From 3ddd092d63d0b7a6c7ed3be189fa9e8c76fa8196 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Dec 2017 14:42:48 +0000 Subject: [PATCH 01/83] WIP: terms and ordered near --- .../apache/lucene/search/ConjunctionDISI.java | 15 ++ .../apache/lucene/search/FilterWeight.java | 5 + .../apache/lucene/search/IntervalFilter.java | 66 ++++++ .../lucene/search/IntervalFunction.java | 70 +++++++ .../lucene/search/IntervalIterator.java | 74 +++++++ .../apache/lucene/search/IntervalQuery.java | 149 ++++++++++++++ .../apache/lucene/search/IntervalScorer.java | 76 +++++++ .../org/apache/lucene/search/Intervals.java | 191 ++++++++++++++++++ .../apache/lucene/search/LRUQueryCache.java | 5 + .../java/org/apache/lucene/search/Scorer.java | 1 + .../org/apache/lucene/search/TermQuery.java | 15 ++ .../org/apache/lucene/search/TermScorer.java | 1 + .../java/org/apache/lucene/search/Weight.java | 4 + .../lucene/search/TestIntervalQuery.java | 104 ++++++++++ .../apache/lucene/search/TestIntervals.java | 152 ++++++++++++++ 15 files changed, 928 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/Intervals.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestIntervals.java diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 780e854033a8..a5eabcc0266a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -71,6 +71,21 @@ public static DocIdSetIterator intersectIterators(List iterato return createConjunction(allIterators, twoPhaseIterators); } + public static DocIdSetIterator intersectIntervals(List iterators) { + if (iterators.size() < 2) { + throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); + } + final List allIterators = new ArrayList<>(); + final List twoPhaseIterators = new ArrayList<>(); + for (IntervalIterator iterator : iterators) { + if (iterator == null) + return DocIdSetIterator.empty(); + addIterator(iterator.approximation(), allIterators, twoPhaseIterators); + } + + return createConjunction(allIterators, twoPhaseIterators); + } + /** Create a conjunction over the provided {@link Spans}. Note that the * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 925c9534f898..bdb9108bcc42 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -60,6 +60,11 @@ public boolean isCacheable(LeafReaderContext ctx) { return in.isCacheable(ctx); } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return in.intervals(context, field); + } + @Override public void extractTerms(Set terms) { in.extractTerms(terms); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java new file mode 100644 index 000000000000..4b0812120db5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +public abstract class IntervalFilter implements IntervalIterator { + + private final IntervalIterator in; + + public IntervalFilter(IntervalIterator in) { + this.in = in; + } + + protected abstract boolean accept(); + + @Override + public final int nextInterval() throws IOException { + int next; + do { + next = in.nextInterval(); + } + while (accept() == false && next != Intervals.NO_MORE_INTERVALS); + return next; + } + + @Override + public final int start() { + return in.start(); + } + + @Override + public final int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public DocIdSetIterator approximation() { + return in.approximation(); + } + + @Override + public void advanceTo(int doc) throws IOException { + in.advanceTo(doc); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java new file mode 100644 index 000000000000..0db038febea1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +public abstract class IntervalFunction implements Function, IntervalIterator> { + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + + @Override + public abstract String toString(); + + public static class OrderedNearFunction extends IntervalFunction { + + public OrderedNearFunction(int minWidth, int maxWidth) { + this.minWidth = minWidth; + this.maxWidth = maxWidth; + } + + final int minWidth; + final int maxWidth; + + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.innerWidthFilter(Intervals.orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + } + + @Override + public String toString() { + return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OrderedNearFunction that = (OrderedNearFunction) o; + return minWidth == that.minWidth && + maxWidth == that.maxWidth; + } + + @Override + public int hashCode() { + return Objects.hash(minWidth, maxWidth); + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java new file mode 100644 index 000000000000..af4e0520a86e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.index.PostingsEnum; + +public interface IntervalIterator { + + DocIdSetIterator approximation(); + + int start(); + + int end(); + + int innerWidth(); + + void advanceTo(int doc) throws IOException; + + int nextInterval() throws IOException; + + default float score() { + return (float) (1.0 / (1.0 + (end() - start()))); + } + + IntervalIterator EMPTY = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + return DocIdSetIterator.empty(); + } + + @Override + public int start() { + return -1; + } + + @Override + public int end() { + return -1; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public void advanceTo(int doc) throws IOException { + + } + + @Override + public int nextInterval() throws IOException { + return Intervals.NO_MORE_INTERVALS; + } + }; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java new file mode 100644 index 000000000000..068912e93855 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; + +public final class IntervalQuery extends Query { + + private final String field; + private final List subQueries; + private final IntervalFunction iteratorFunction; + + public static IntervalQuery orderedNearQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); + } + + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + this.field = field; + this.subQueries = subQueries; + this.iteratorFunction = iteratorFunction; + } + + public String getField() { + return field; + } + + @Override + public String toString(String field) { + return iteratorFunction.toString() + subQueries.stream().map(Object::toString) + .collect(Collectors.joining(",", "(", ")")); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + List subWeights = new ArrayList<>(); + for (Query q : subQueries) { + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_NO_SCORES, boost)); + } + return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); + } + + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights) { + // nocommit + return new Similarity.SimScorer(field) { + @Override + public float score(float freq, long norm) { + return 1; + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IntervalQuery that = (IntervalQuery) o; + return Objects.equals(field, that.field) && + Objects.equals(subQueries, that.subQueries) && + Objects.equals(iteratorFunction, that.iteratorFunction); + } + + @Override + public int hashCode() { + return Objects.hash(field, subQueries, iteratorFunction); + } + + private class IntervalWeight extends Weight { + + final List subWeights; + final Similarity.SimScorer simScorer; + final ScoreMode scoreMode; + + public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, ScoreMode scoreMode) { + super(query); + this.subWeights = subWeights; + this.simScorer = simScorer; + this.scoreMode = scoreMode; + } + + @Override + public void extractTerms(Set terms) { + for (Weight w : subWeights) { + w.extractTerms(terms); + } + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context); + if (scorer != null && scorer.iterator().advance(doc) == doc) { + return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this + } + return Explanation.noMatch("No matching intervals"); + } + + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + List subIntervals = new ArrayList<>(); + for (Weight w : subWeights) { + subIntervals.add(w.intervals(context, field)); + } + return IntervalQuery.this.iteratorFunction.apply(subIntervals); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + IntervalIterator intervals = intervals(context, field); + if (intervals == IntervalIterator.EMPTY) + return null; + LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit + return new IntervalScorer(this, intervals, leafScorer); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + for (Weight w : subWeights) { + if (w.isCacheable(ctx) == false) + return false; + } + return true; + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java new file mode 100644 index 000000000000..24c7585d44cd --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +class IntervalScorer extends Scorer { + + private final IntervalIterator intervals; + private final DocIdSetIterator approximation; + private final LeafSimScorer simScorer; + + protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { + super(weight); + this.intervals = intervals; + this.approximation = intervals.approximation(); + this.simScorer = simScorer; + } + + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public float score() throws IOException { + float freq = 0; + do { + freq += intervals.score(); + } while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + return simScorer.score(docID(), freq); + } + + @Override + public DocIdSetIterator iterator() { + return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return new TwoPhaseIterator(approximation) { + @Override + public boolean matches() throws IOException { + intervals.advanceTo(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + }; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java new file mode 100644 index 000000000000..db67f0909673 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.PostingsEnum; + +public final class Intervals { + + public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; + + public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = end() - start(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = innerWidth(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator termIterator(PostingsEnum pe) { + return new TermIntervalIterator(pe); + } + + private static class TermIntervalIterator implements IntervalIterator { + + public TermIntervalIterator(PostingsEnum pe) { + this.pe = pe; + } + + private final PostingsEnum pe; + + int upTo = -1; + int pos = -1; + + @Override + public DocIdSetIterator approximation() { + return pe; + } + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public void advanceTo(int doc) throws IOException { + pos = -1; + if (pe.docID() == doc || (pe.docID() < doc && pe.advance(doc) == doc)) { + upTo = pe.freq(); + } + else { + upTo = -1; + } + } + + @Override + public int nextInterval() throws IOException { + if (upTo <= 0) { + return pos = NO_MORE_INTERVALS; + } + upTo--; + return pos = pe.nextPosition(); + } + + @Override + public String toString() { + return pe.docID() + "[" + pos + "]"; + } + } + + public static IntervalIterator orderedIntervalIterator(List subIterators) { + for (IntervalIterator it : subIterators) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new OrderedIntervalIterator(subIterators); + } + + private static class OrderedIntervalIterator implements IntervalIterator { + + final List subIntervals; + final DocIdSetIterator approximation; + + int start; + int end; + int innerWidth; + int i; + + private OrderedIntervalIterator(List subIntervals) { + this.subIntervals = subIntervals; + this.approximation = ConjunctionDISI.intersectIntervals(subIntervals); + } + + @Override + public DocIdSetIterator approximation() { + return approximation; + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerWidth; + } + + @Override + public void advanceTo(int doc) throws IOException { + for (IntervalIterator it : subIntervals) { + it.advanceTo(doc); + } + subIntervals.get(0).nextInterval(); + i = 1; + start = end = innerWidth = Integer.MIN_VALUE; + } + + @Override + public int nextInterval() throws IOException { + start = end = NO_MORE_INTERVALS; + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + if (subIntervals.get(i - 1).end() >= b) + return start; + if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + break; + do { + if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + return start; + } + while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + i++; + } + start = subIntervals.get(0).start(); + end = subIntervals.get(subIntervals.size() - 1).end(); + b = subIntervals.get(subIntervals.size() - 1).start(); + innerWidth = b - subIntervals.get(0).end() - 1; + i = 1; + if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + return start; + } + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index beb73ad11159..f9b5bda6ce45 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -714,6 +714,11 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { && leavesToCache.test(context); } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return in.intervals(context, field); + } + @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 81624ccac9d8..f1da1fb50cf6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -178,4 +178,5 @@ public int advanceShallow(int target) throws IOException { * included and {@code upTo} included. */ public abstract float getMaxScore(int upTo) throws IOException; + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index f1f44154f554..da06023b7c30 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; @@ -85,6 +86,20 @@ public String toString() { return "weight(" + TermQuery.this + ")"; } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + if (term.field().equals(field) == false) { + return null; + } + assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; + final TermsEnum termsEnum = getTermsEnum(context); + if (termsEnum == null) { + return null; + } + PostingsEnum pe = termsEnum.postings(null, PostingsEnum.POSITIONS); + return Intervals.termIterator(pe); + } + @Override public Scorer scorer(LeafReaderContext context) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index d51626fda8c1..1d9d5e670ce2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -27,6 +27,7 @@ /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { + private final PostingsEnum postingsEnum; private final ImpactsEnum impactsEnum; private final DocIdSetIterator iterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 7853ccf2465b..b98a17f9e02b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -102,6 +102,10 @@ public final Query getQuery() { */ public abstract Scorer scorer(LeafReaderContext context) throws IOException; + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return null; + } + /** * Optional method. * Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java new file mode 100644 index 000000000000..bc85bbc58cb7 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIntervalQuery extends LuceneTestCase { + + private IndexSearcher searcher; + private IndexReader reader; + private Directory directory; + + public static final String field = "field"; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newTextField(field, docFields[i], Field.Store.YES)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3", + "w1 xx w2 yy w3", + "w1 w3 xx w2 yy w3", + "w2 w1", + "w2 w1 w3 w2" + }; + + private void checkHits(Query query, int[] results) throws IOException { + CheckHits.checkHits(random(), query, field, searcher, results); + } + + public void testOrderedNearQueryWidth0() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0 }); + } + + public void testOrderedNearQueryWidth1() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0, 1, 2, 5 }); + } + + public void testOrderedNearQueryWidth2() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0, 1, 2, 3, 5 }); + } + + public void testNestedOrderedNearQuery() throws IOException { + // onear/1(w1, onear/2(w2, w3)) + Query q = IntervalQuery.orderedNearQuery(field, 1, + new TermQuery(new Term(field, "w1")), + IntervalQuery.orderedNearQuery(field, 2, + new TermQuery(new Term(field, "w2")), + new TermQuery(new Term(field, "w3"))) + ); + + checkHits(q, new int[]{ 0, 1, 2 }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java new file mode 100644 index 000000000000..b38e27bba222 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestIntervals extends LuceneTestCase { + + private static String field1_docs[] = { + "Nothing of interest to anyone here", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", + "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", + "Nor here, nowt hot going on in this one", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", + "Porridge is great" + }; + + private static String field2_docs[] = { + "In Xanadu did Kubla Khan a stately pleasure dome decree", + "Where Alph the sacred river ran through caverns measureless to man", + "Down to a sunless sea", + "So thrice five miles of fertile ground", + "With walls and towers were girdled round", + "Which was nice" + }; + + private static Directory directory; + private static IndexSearcher searcher; + private static Analyzer analyzer = new StandardAnalyzer(); + + @BeforeClass + public static void setupIndex() throws IOException { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(analyzer)); + for (int i = 0; i < field1_docs.length; i++) { + Document doc = new Document(); + doc.add(new TextField("field1", field1_docs[i], Field.Store.NO)); + doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); + doc.add(new NumericDocValuesField("id", i)); + writer.addDocument(doc); + } + writer.close(); + searcher = new IndexSearcher(DirectoryReader.open(directory)); + } + + @AfterClass + public static void teardownIndex() throws IOException { + IOUtils.close(searcher.getIndexReader(), directory); + } + + public void testTermQueryIntervals() throws IOException { + int[][] expected = new int[][]{ + {}, + { 1, 4, 7 }, + { 1, 4, 7 }, + {}, + { 1, 4, 7 }, + { 0 } + }; + + Weight weight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "porridge")), ScoreMode.COMPLETE); + for (LeafReaderContext ctx : searcher.leafContexts) { + assertNull(weight.intervals(ctx, "field2")); + NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); + IntervalIterator intervals = weight.intervals(ctx, "field1"); + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + ids.advance(doc); + int id = (int) ids.longValue(); + intervals.advanceTo(doc); + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i], intervals.end()); + i++; + } + assertEquals(expected[id].length, i); + } + } + + } + + public void testOrderedNearIntervals() throws IOException { + + int[][] expected = new int[][]{ + {}, + { 0, 2, 6, 17 }, + { 3, 5, 6, 21 }, + {}, + { 0, 2, 6, 17 }, + { } + }; + + Weight peaseWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "pease")), ScoreMode.COMPLETE); + Weight hotWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "hot")), ScoreMode.COMPLETE); + for (LeafReaderContext ctx : searcher.leafContexts) { + NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); + IntervalIterator intervals = Intervals.orderedIntervalIterator( + Arrays.asList(peaseWeight.intervals(ctx, "field1"), hotWeight.intervals(ctx, "field1")) + ); + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + ids.advance(doc); + int id = (int) ids.longValue(); + intervals.advanceTo(doc); + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; + } + assertEquals(expected[id].length, i); + } + } + + } + +} From d73b199deaa1ac279553981ec09d107300ac693a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 20 Feb 2018 14:25:51 +0000 Subject: [PATCH 02/83] WIP --- .../lucene/document/RangeFieldQuery.java | 6 +- .../SortedNumericDocValuesRangeQuery.java | 2 +- .../SortedSetDocValuesRangeQuery.java | 2 +- .../lucene/index/FrozenBufferedUpdates.java | 2 +- .../org/apache/lucene/index/PostingsEnum.java | 4 + .../java/org/apache/lucene/index/Sorter.java | 25 ----- .../search/BlockMaxConjunctionScorer.java | 4 + .../apache/lucene/search/BooleanWeight.java | 17 +-- .../lucene/search/CachingCollector.java | 5 + .../apache/lucene/search/ConjunctionDISI.java | 15 --- .../lucene/search/ConjunctionScorer.java | 13 +-- .../lucene/search/ConstantScoreQuery.java | 8 +- .../lucene/search/ConstantScoreScorer.java | 5 + .../lucene/search/ConstantScoreWeight.java | 3 +- .../org/apache/lucene/search/DisiWrapper.java | 17 +++ .../lucene/search/DisjunctionMaxQuery.java | 4 +- .../lucene/search/DisjunctionScorer.java | 5 + .../search/DocValuesFieldExistsQuery.java | 2 +- .../lucene/search/DocValuesRewriteMethod.java | 2 +- .../lucene/search/DoubleValuesSource.java | 3 +- .../lucene/search/ExactPhraseScorer.java | 5 + .../org/apache/lucene/search/FakeScorer.java | 5 + .../apache/lucene/search/FilterScorer.java | 5 + .../apache/lucene/search/FilterWeight.java | 9 +- .../lucene/search/IndexOrDocValuesQuery.java | 10 +- .../apache/lucene/search/IntervalFilter.java | 9 +- .../lucene/search/IntervalIterator.java | 10 +- .../apache/lucene/search/IntervalQuery.java | 21 ++-- .../apache/lucene/search/IntervalScorer.java | 15 ++- .../org/apache/lucene/search/Intervals.java | 105 ++++++++++++++---- .../apache/lucene/search/LRUQueryCache.java | 21 ++-- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MatchNoDocsQuery.java | 2 +- .../search/MinShouldMatchSumScorer.java | 5 + .../lucene/search/MultiPhraseQuery.java | 6 +- .../MultiTermQueryConstantScoreWrapper.java | 4 +- .../lucene/search/NormsFieldExistsQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 6 +- .../apache/lucene/search/PointInSetQuery.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 6 +- .../apache/lucene/search/QueryRescorer.java | 3 +- .../apache/lucene/search/ReqExclScorer.java | 5 + .../apache/lucene/search/ReqOptSumScorer.java | 11 ++ .../java/org/apache/lucene/search/Scorer.java | 2 + .../lucene/search/SloppyPhraseScorer.java | 5 + .../apache/lucene/search/SynonymQuery.java | 7 +- .../apache/lucene/search/TermInSetQuery.java | 4 +- .../org/apache/lucene/search/TermQuery.java | 20 +--- .../org/apache/lucene/search/TermScorer.java | 17 ++- .../org/apache/lucene/search/WANDScorer.java | 5 + .../java/org/apache/lucene/search/Weight.java | 17 ++- .../lucene/search/spans/SpanScorer.java | 6 + .../lucene/search/spans/SpanWeight.java | 4 +- .../lucene/search/TestIntervalQuery.java | 12 +- .../apache/lucene/search/TestIntervals.java | 84 +++++++------- .../apache/lucene/search/AssertingScorer.java | 5 + .../search/BulkScorerWrapperScorer.java | 5 + .../org/apache/lucene/search/QueryUtils.java | 15 +-- .../lucene/search/ScorerIndexSearcher.java | 3 +- .../search/spans/AssertingSpanWeight.java | 4 +- 60 files changed, 366 insertions(+), 262 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index a24b7cdfae58..d507da963f4c 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -290,7 +290,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (values == null) { @@ -350,8 +350,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 246b50f3dab6..0c0efdf56660 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -102,7 +102,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { SortedNumericDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index de7c11b1cc9a..cd6cfadac7f6 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -107,7 +107,7 @@ public Query rewrite(IndexReader reader) throws IOException { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { SortedSetDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 202bf2cc49af..f25d7699bb88 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -686,7 +686,7 @@ private long applyQueryDeletes(BufferedUpdatesStream.SegmentState[] segStates) t final IndexSearcher searcher = new IndexSearcher(readerContext.reader()); searcher.setQueryCache(null); final Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE_NO_SCORES); - final Scorer scorer = weight.scorer(readerContext); + final Scorer scorer = weight.scorer(readerContext, PostingsEnum.NONE); if (scorer != null) { final DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index fdd32a9f2fe0..bb93268ff92b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -63,6 +63,10 @@ public static boolean featureRequested(int flags, short feature) { return (flags & feature) == feature; } + public static short highest(short a, short b) { + return (short) Math.max(a, b); + } + private AttributeSource atts = null; /** Sole constructor. (For invocation by subclass diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index c47f9a118abb..a081ea7aaf3b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -445,30 +445,5 @@ public String getID() { public String toString() { return getID(); } - - static final Scorer FAKESCORER = new Scorer(null) { - - float score; - int doc = -1; - - @Override - public int docID() { - return doc; - } - - public DocIdSetIterator iterator() { - throw new UnsupportedOperationException(); - } - - @Override - public float score() throws IOException { - return score; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; - } - }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 070b6c40f025..2c625643ef15 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -240,4 +240,8 @@ public Collection getChildren() { return children; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index fffdd09093f1..ea1ffed6594a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -27,6 +27,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; @@ -112,7 +113,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio // contributions to the score to floats), so in order to make sure that // explanations have the same value as the score, we pull a scorer and // use it to compute the score. - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.NONE); int advanced = scorer.iterator().advance(doc); assert advanced == doc; return Explanation.match(scorer.score(), "sum of:", subs); @@ -210,7 +211,7 @@ private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOExcept /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} * cannot be used. */ - BulkScorer booleanScorer(LeafReaderContext context) throws IOException { + BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOException { final int numOptionalClauses = query.getClauses(Occur.SHOULD).size(); final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size(); @@ -262,7 +263,7 @@ BulkScorer booleanScorer(LeafReaderContext context) throws IOException { for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isProhibited()) { - Scorer scorer = w.scorer(context); + Scorer scorer = w.scorer(context, postings); if (scorer != null) { prohibited.add(scorer); } @@ -290,7 +291,7 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { // so that we can dynamically prune non-competitive hits. return super.bulkScorer(context); } - final BulkScorer bulkScorer = booleanScorer(context); + final BulkScorer bulkScorer = booleanScorer(context, PostingsEnum.NONE); if (bulkScorer != null) { // bulk scoring is applicable, use it return bulkScorer; @@ -301,8 +302,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } @@ -325,7 +326,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); final Map> scorers = new EnumMap<>(Occur.class); @@ -336,7 +337,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti Iterator cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - ScorerSupplier subScorer = w.scorerSupplier(context); + ScorerSupplier subScorer = w.scorerSupplier(context, postings); if (subScorer == null) { if (c.isRequired()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index 3bed88dd9980..ae705455ecfa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -64,6 +64,11 @@ public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public final float score() { return score; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index a5eabcc0266a..780e854033a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -71,21 +71,6 @@ public static DocIdSetIterator intersectIterators(List iterato return createConjunction(allIterators, twoPhaseIterators); } - public static DocIdSetIterator intersectIntervals(List iterators) { - if (iterators.size() < 2) { - throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); - } - final List allIterators = new ArrayList<>(); - final List twoPhaseIterators = new ArrayList<>(); - for (IntervalIterator iterator : iterators) { - if (iterator == null) - return DocIdSetIterator.empty(); - addIterator(iterator.approximation(), allIterators, twoPhaseIterators); - } - - return createConjunction(allIterators, twoPhaseIterators); - } - /** Create a conjunction over the provided {@link Spans}. Note that the * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 7a1b9563721b..f820cd0aaa8e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -101,14 +101,9 @@ public Collection getChildren() { return children; } - static final class DocsAndFreqs { - final long cost; - final DocIdSetIterator iterator; - int doc = -1; - - DocsAndFreqs(DocIdSetIterator iterator) { - this.iterator = iterator; - this.cost = iterator.cost(); - } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 464cde6a45f9..abdb85953dd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -125,8 +125,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context, postings); if (innerScorerSupplier == null) { return null; } @@ -159,8 +159,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 45a6bdbad041..56adcf390553 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -63,6 +63,11 @@ public DocIdSetIterator iterator() { return disi; } + @Override + public IntervalIterator intervals(String field) { + return null; + } + @Override public TwoPhaseIterator twoPhaseIterator() { return twoPhaseIterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index 671ec7103782..57316b9f7e4a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; /** @@ -53,7 +54,7 @@ protected final float score() { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - final Scorer s = scorer(context); + final Scorer s = scorer(context, PostingsEnum.NONE); final boolean exists; if (s == null) { exists = false; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index fac9418010f4..b8891b7c0ce2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,5 +81,22 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } + + public DisiWrapper(DocIdSetIterator disi) { + this.scorer = null; + this.spans = null; + this.iterator = disi; + this.cost = iterator.cost(); + this.doc = -1; + this.twoPhaseView = TwoPhaseIterator.unwrap(disi); + if (twoPhaseView != null) { + approximation = twoPhaseView.approximation(); + matchCost = twoPhaseView.matchCost(); + } + else { + approximation = iterator; + matchCost = 0f; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 1e67cb150465..552dff1283a5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -120,11 +120,11 @@ public void extractTerms(Set terms) { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context); + Scorer subScorer = w.scorer(context, postings); if (subScorer != null) { scorers.add(subScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 147b993f2d9f..f69fd936f453 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -180,6 +180,11 @@ public final float score() throws IOException { return score(getSubMatches()); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + /** Compute the score for the given linked list of scorers. */ protected abstract float score(DisiWrapper topList) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index 009f11cf116f..bbf9c3ced9a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -65,7 +65,7 @@ public String toString(String field) { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index 5d591983fab0..a141cbe64a72 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -75,7 +75,7 @@ public final int hashCode() { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field); TermsEnum termsEnum = query.getTermsEnum(new Terms() { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 3938d3f9ba22..d951fc45db83 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; /** * Base class for producing {@link DoubleValues} @@ -577,7 +578,7 @@ private WeightDoubleValuesSource(Weight weight) { @Override public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { - Scorer scorer = weight.scorer(ctx); + Scorer scorer = weight.scorer(ctx, PostingsEnum.NONE); if (scorer == null) return DoubleValues.EMPTY; DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index d7c4f9f6e2b8..d68f8557dd10 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -125,6 +125,11 @@ public float getMaxScore(int upTo) throws IOException { return docScorer.maxScore(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + /** Advance the given pos enum to the first doc on or after {@code target}. * Return {@code false} if the enum was exhausted before reaching * {@code target} and {@code true} otherwise. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index c8b34381b2b2..1fcac3a05107 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -45,6 +45,11 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 7bcb1ce4a64b..6de7e107300e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -67,6 +67,11 @@ public final int docID() { return in.docID(); } + @Override + public IntervalIterator intervals(String field) { + return in.intervals(field); + } + @Override public final DocIdSetIterator iterator() { return in.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index bdb9108bcc42..3ac351029097 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -60,11 +60,6 @@ public boolean isCacheable(LeafReaderContext ctx) { return in.isCacheable(ctx); } - @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return in.intervals(context, field); - } - @Override public void extractTerms(Set terms) { in.extractTerms(terms); @@ -76,8 +71,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - return in.scorer(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + return in.scorer(context, postings); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index f89924d16054..50b04523baa3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -133,9 +133,9 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context); - final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context, postings); + final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context, postings); if (indexScorerSupplier == null || dvScorerSupplier == null) { return null; } @@ -162,8 +162,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 4b0812120db5..852ac8070160 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,12 +55,7 @@ public int innerWidth() { } @Override - public DocIdSetIterator approximation() { - return in.approximation(); - } - - @Override - public void advanceTo(int doc) throws IOException { - in.advanceTo(doc); + public void reset() throws IOException { + in.reset(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index af4e0520a86e..a85c24011e1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -23,15 +23,13 @@ public interface IntervalIterator { - DocIdSetIterator approximation(); - int start(); int end(); int innerWidth(); - void advanceTo(int doc) throws IOException; + void reset() throws IOException; int nextInterval() throws IOException; @@ -40,10 +38,6 @@ default float score() { } IntervalIterator EMPTY = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - return DocIdSetIterator.empty(); - } @Override public int start() { @@ -61,7 +55,7 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { + public void reset() { } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 068912e93855..16de6fc9c1a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -26,6 +26,7 @@ import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; @@ -111,7 +112,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null && scorer.iterator().advance(doc) == doc) { return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this } @@ -119,21 +120,17 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { List subIntervals = new ArrayList<>(); + List disis = new ArrayList<>(); for (Weight w : subWeights) { - subIntervals.add(w.intervals(context, field)); + Scorer scorer = w.scorer(context, PostingsEnum.POSITIONS); + disis.add(scorer.iterator()); + subIntervals.add(scorer.intervals(field)); } - return IntervalQuery.this.iteratorFunction.apply(subIntervals); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - IntervalIterator intervals = intervals(context, field); - if (intervals == IntervalIterator.EMPTY) - return null; + IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit - return new IntervalScorer(this, intervals, leafScorer); + return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 24c7585d44cd..b25fa3e6f31e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -22,14 +22,16 @@ class IntervalScorer extends Scorer { private final IntervalIterator intervals; + private final String field; private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; - protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { + protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = intervals.approximation(); + this.approximation = approximation; this.simScorer = simScorer; + this.field = field; } @Override @@ -46,6 +48,13 @@ public float score() throws IOException { return simScorer.score(docID(), freq); } + @Override + public IntervalIterator intervals(String field) { + if (this.field.equals(field)) + return intervals; + return null; + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -56,7 +65,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.advanceTo(docID()); + intervals.reset(); return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index db67f0909673..d8cb9a98e71f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -61,11 +62,6 @@ public TermIntervalIterator(PostingsEnum pe) { int upTo = -1; int pos = -1; - @Override - public DocIdSetIterator approximation() { - return pe; - } - @Override public int start() { return pos; @@ -82,14 +78,8 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { - pos = -1; - if (pe.docID() == doc || (pe.docID() < doc && pe.advance(doc) == doc)) { - upTo = pe.freq(); - } - else { - upTo = -1; - } + public void reset() throws IOException { + upTo = pe.freq(); } @Override @@ -118,7 +108,6 @@ public static IntervalIterator orderedIntervalIterator(List su private static class OrderedIntervalIterator implements IntervalIterator { final List subIntervals; - final DocIdSetIterator approximation; int start; int end; @@ -127,12 +116,6 @@ private static class OrderedIntervalIterator implements IntervalIterator { private OrderedIntervalIterator(List subIntervals) { this.subIntervals = subIntervals; - this.approximation = ConjunctionDISI.intersectIntervals(subIntervals); - } - - @Override - public DocIdSetIterator approximation() { - return approximation; } @Override @@ -151,9 +134,9 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { + public void reset() throws IOException { for (IntervalIterator it : subIntervals) { - it.advanceTo(doc); + it.reset(); } subIntervals.get(0).nextInterval(); i = 1; @@ -188,4 +171,82 @@ public int nextInterval() throws IOException { } } + public static IntervalIterator or(List subIterators) { + return new DisjunctionIntervalIterator(subIterators); + } + + private static class DisjunctionIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + IntervalIterator current; + + DisjunctionIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + @Override + public void reset() throws IOException { + queue.clear(); + for (int i = 0; i < subIterators.length; i++) { + subIterators[i].reset(); + subIterators[i].nextInterval(); + queue.add(subIterators[i]); + } + current = null; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = queue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (queue.size() > 0 && contains(queue.top(), start, end)) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + } + } + if (queue.size() == 0) { + current = IntervalIterator.EMPTY; + return NO_MORE_INTERVALS; + } + current = queue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index f9b5bda6ce45..5b8cf917ba1d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -715,36 +715,31 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return in.intervals(context, field); - } - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } if (in.isCacheable(context) == false) { // this segment is not suitable for caching - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } // Short-circuit: Check whether this segment is eligible for caching // before we take a lock because of #get if (shouldCache(context) == false) { - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } // If the lock is already busy, prefer using the uncached version than waiting if (lock.tryLock() == false) { - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); if (cacheHelper == null) { // this reader has no cache helper - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } DocIdSet docIdSet; try { @@ -754,7 +749,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti } if (docIdSet == null) { - ScorerSupplier inSupplier = in.scorerSupplier(context); + ScorerSupplier inSupplier = in.scorerSupplier(context, postings); if (inSupplier == null) { putIfAbsent(in.getQuery(), context, DocIdSet.EMPTY, cacheHelper); return null; @@ -814,8 +809,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 89b299734144..e878924a9f99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -36,7 +36,7 @@ public String toString() { return "weight(" + MatchAllDocsQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index 525a18395434..d539c3d21453 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -54,7 +54,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index 6ffbe340144e..98df563a61ba 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -126,6 +126,11 @@ public final Collection getChildren() throws IOException { return matchingChildren; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 65d6631e9a7c..23eb49621252 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -236,7 +236,7 @@ public void extractTerms(Set terms) { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short pf) throws IOException { assert termArrays.length != 0; final LeafReader reader = context.reader(); @@ -265,7 +265,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); + postings.add(termsEnum.postings(null, PostingsEnum.highest(pf, PostingsEnum.POSITIONS))); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -307,7 +307,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index 3a46b96411cf..cfad13791b1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -203,10 +203,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context); + return weightOrBitSet.weight.scorer(context, postings); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 74218b40b0c3..9e639247aec8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -65,7 +65,7 @@ public String toString(String field) { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null || fieldInfo.hasNorms() == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index ff1538820d61..b58f1ed868aa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -396,7 +396,7 @@ public void extractTerms(Set queryTerms) { public String toString() { return "weight(" + PhraseQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { assert terms.length > 0; final LeafReader reader = context.reader(); PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length]; @@ -422,7 +422,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS); + PostingsEnum postingsEnum = te.postings(null, PostingsEnum.highest(postings, PostingsEnum.POSITIONS)); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -455,7 +455,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 689d64a50d74..325e06060336 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -114,7 +114,7 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 7e48383b4720..897eb5d989ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -224,7 +224,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); @@ -314,8 +314,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 6b19f295a7d8..054a23dad595 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. @@ -82,7 +83,7 @@ public int compare(ScoreDoc a, ScoreDoc b) { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext); + scorer = weight.scorer(readerContext, PostingsEnum.FREQS); } if (scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index 987293eb0476..aa9108512190 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -61,6 +61,11 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { return it == null || it.matches(); } + @Override + public IntervalIterator intervals(String field) { + return reqScorer.intervals(field); + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 6d93a54560d4..d8be205f37de 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -184,6 +184,17 @@ public DocIdSetIterator iterator() { } } + @Override + public IntervalIterator intervals(String field) { + IntervalIterator reqIntervals = reqScorer.intervals(field); + IntervalIterator optIntervals = optScorer.intervals(field); + if (optIntervals == null) + return reqIntervals; + if (reqIntervals == null) + return optIntervals; + return Intervals.or(Arrays.asList(reqIntervals, optIntervals)); + } + @Override public int docID() { return reqScorer.docID(); diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index f1da1fb50cf6..3c05fb194281 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,6 +125,8 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); + public abstract IntervalIterator intervals(String field); + /** * Optional method: Return a {@link TwoPhaseIterator} view of this * {@link Scorer}. A return value of {@code null} indicates that diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 7587b37889b7..20a375955410 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -563,6 +563,11 @@ public float getMaxScore(int upTo) throws IOException { @Override public String toString() { return "scorer(" + weight + ")"; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit. this will be fun + } + @Override public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 2a7c450805d9..9fe7dbbbecf5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; @@ -161,7 +162,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.FREQS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -187,7 +188,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { IndexOptions indexOptions = IndexOptions.NONE; if (terms.length > 0) { FieldInfo info = context.reader() @@ -208,7 +209,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, postings, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index a8bf5b0679c1..941e39227ca5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -309,12 +309,12 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet == null) { return null; } else if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context); + return weightOrBitSet.weight.scorer(context, postings); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index da06023b7c30..d6e0386a2540 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -87,21 +87,7 @@ public String toString() { } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - if (term.field().equals(field) == false) { - return null; - } - assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; - final TermsEnum termsEnum = getTermsEnum(context); - if (termsEnum == null) { - return null; - } - PostingsEnum pe = termsEnum.postings(null, PostingsEnum.POSITIONS); - return Intervals.termIterator(pe); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { @@ -113,7 +99,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, termsEnum, scoreMode, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, postings, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -159,7 +145,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - TermScorer scorer = (TermScorer) scorer(context); + TermScorer scorer = (TermScorer) scorer(context, PostingsEnum.FREQS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 1d9d5e670ce2..7162d481444e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -33,6 +33,7 @@ final class TermScorer extends Scorer { private final DocIdSetIterator iterator; private final LeafSimScorer docScorer; private float minCompetitiveScore; + private final String field; /** * Construct a TermScorer. @@ -44,11 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, short postings, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; + this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS); + impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.highest(postings, PostingsEnum.FREQS)); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -105,7 +107,8 @@ public long cost() { } }; } else { - postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); + short pf = PostingsEnum.highest(scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE, postings); + postingsEnum = te.postings(null, pf); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } @@ -125,6 +128,14 @@ public DocIdSetIterator iterator() { return iterator; } + @Override + public IntervalIterator intervals(String field) { + if (this.field.equals(field)) { + return Intervals.termIterator(postingsEnum); + } + return null; + } + @Override public float score() throws IOException { assert docID() != DocIdSetIterator.NO_MORE_DOCS; diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f7a88f15927e..f39380b5cd88 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -440,6 +440,11 @@ private void advanceAllTail() throws IOException { assert ensureConsistent(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public float score() throws IOException { // we need to know about all matches diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index b98a17f9e02b..98788582ae60 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; @@ -36,7 +37,7 @@ * {@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link Scorer}. *

* Since {@link Weight} creates {@link Scorer} instances for a given - * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext)}) + * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

@@ -45,7 +46,7 @@ *

  • A Weight is constructed by a top-level query, given a * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). *
  • A Scorer is constructed by - * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. + * {@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}. * * * @since 2.9 @@ -100,11 +101,7 @@ public final Query getQuery() { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(LeafReaderContext context) throws IOException; - - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return null; - } + public abstract Scorer scorer(LeafReaderContext context, short postings) throws IOException; /** * Optional method. @@ -113,8 +110,8 @@ public IntervalIterator intervals(LeafReaderContext context, String field) throw * builds a {@link ScorerSupplier} wrapper around it. * @see #scorer */ - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - final Scorer scorer = scorer(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + final Scorer scorer = scorer(context, postings); if (scorer == null) { return null; } @@ -148,7 +145,7 @@ public long cost() { */ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.NONE); if (scorer == null) { // No docs match return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 666f163742a3..5fe3fd8dbbf8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,6 +21,7 @@ import java.util.Objects; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IntervalIterator; import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; @@ -57,6 +58,11 @@ public int docID() { return spans.docID(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public DocIdSetIterator iterator() { return spans; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 25b58fdc39a0..d24887f3eb8c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -124,7 +124,7 @@ private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searc public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException; @Override - public SpanScorer scorer(LeafReaderContext context) throws IOException { + public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { final Spans spans = getSpans(context, Postings.POSITIONS); if (spans == null) { return null; @@ -145,7 +145,7 @@ public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - SpanScorer scorer = scorer(context); + SpanScorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index bc85bbc58cb7..77029ffffc4b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -40,7 +40,7 @@ public class TestIntervalQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newTextField(field, docFields[i], Field.Store.YES)); @@ -74,19 +74,19 @@ private void checkHits(Query query, int[] results) throws IOException { public void testOrderedNearQueryWidth0() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new int[]{ 0 }); + new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), - new int[]{ 0, 1, 2, 5 }); + new TermQuery(new Term(field, "w2"))), + new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new int[]{ 0, 1, 2, 3, 5 }); + new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { @@ -98,7 +98,7 @@ public void testNestedOrderedNearQuery() throws IOException { new TermQuery(new Term(field, "w3"))) ); - checkHits(q, new int[]{ 0, 1, 2 }); + checkHits(q, new int[]{0, 1, 2}); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index b38e27bba222..fb3369de885a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -82,71 +83,68 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - public void testTermQueryIntervals() throws IOException { - int[][] expected = new int[][]{ - {}, - { 1, 4, 7 }, - { 1, 4, 7 }, - {}, - { 1, 4, 7 }, - { 0 } - }; - - Weight weight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "porridge")), ScoreMode.COMPLETE); + private void checkIntervals(Query query, String field, int[][] expected) throws IOException { + Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext ctx : searcher.leafContexts) { - assertNull(weight.intervals(ctx, "field2")); + Scorer scorer = weight.scorer(ctx, PostingsEnum.POSITIONS); + assertNull(scorer.intervals(field + "1")); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = weight.intervals(ctx, "field1"); - for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + IntervalIterator intervals = scorer.intervals("field1"); + DocIdSetIterator it = scorer.iterator(); + int matchedDocs = 0; + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - intervals.advanceTo(doc); + intervals.reset(); int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i], intervals.end()); - i++; + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; } assertEquals(expected[id].length, i); } + assertEquals(expected.length, matchedDocs); } + } + public void testTermQueryIntervals() throws IOException { + checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", new int[][]{ + {}, + { 1, 1, 4, 4, 7, 7 }, + { 1, 1, 4, 4, 7, 7 }, + {}, + { 1, 1, 4, 4, 7, 7 }, + { 0 } + }); } public void testOrderedNearIntervals() throws IOException { - - int[][] expected = new int[][]{ + checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + "field1", new int[][]{ {}, { 0, 2, 6, 17 }, { 3, 5, 6, 21 }, {}, { 0, 2, 6, 17 }, { } - }; - - Weight peaseWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "pease")), ScoreMode.COMPLETE); - Weight hotWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "hot")), ScoreMode.COMPLETE); - for (LeafReaderContext ctx : searcher.leafContexts) { - NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = Intervals.orderedIntervalIterator( - Arrays.asList(peaseWeight.intervals(ctx, "field1"), hotWeight.intervals(ctx, "field1")) - ); - for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { - ids.advance(doc); - int id = (int) ids.longValue(); - intervals.advanceTo(doc); - int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - assertEquals(expected[id][i], pos); - assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i + 1], intervals.end()); - i += 2; - } - assertEquals(expected[id].length, i); - } - } - + }); } + public void testIntervalDisjunction() throws IOException { + checkIntervals(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) + .build(), "field1", new int[][]{ + {}, + { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, + { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, + { 3, 3 }, + { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, + {} + }); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 80cd4da7cf0d..e6eeae907da8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -112,6 +112,11 @@ public Collection getChildren() { return Collections.singletonList(new ChildScorer(in, "SHOULD")); } + @Override + public IntervalIterator intervals(String field) { + return in.intervals(field); + } + @Override public int docID() { return in.docID(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java index 9206b0484d4d..a8ef239d93cb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java @@ -115,4 +115,9 @@ public long cost() { }; } + @Override + public IntervalIterator intervals(String field) { + return null; + } + } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index fa113113f81a..b4e25a37d649 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -311,7 +312,7 @@ public void collect(int doc) throws IOException { if (scorer == null) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context); + scorer = w.scorer(context, PostingsEnum.FREQS); iterator = scorer.iterator(); } @@ -376,7 +377,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = (LeafReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx); + Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -406,7 +407,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx); + Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -444,7 +445,7 @@ public void collect(int doc) throws IOException { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer(context.get(leafPtr)); + Scorer scorer = w.scorer(context.get(leafPtr), PostingsEnum.FREQS); Assert.assertTrue("query collected "+doc+" but advance("+i+") says no more docs!",scorer.iterator().advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but advance("+i+") got to "+scorer.docID(),doc,scorer.docID()); float advanceScore = scorer.score(); @@ -477,7 +478,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -505,7 +506,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -525,7 +526,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { public static void checkBulkScorerSkipTo(Random r, Query query, IndexSearcher searcher) throws IOException { Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext context : searcher.getIndexReader().leaves()) { - final Scorer scorer = weight.scorer(context); + final Scorer scorer = weight.scorer(context, PostingsEnum.FREQS); final BulkScorer bulkScorer = weight.bulkScorer(context); if (scorer == null && bulkScorer == null) { continue; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index ae699130190d..dd2fd884f076 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.Bits; /** @@ -48,7 +49,7 @@ protected void search(List leaves, Weight weight, Collector c // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren - Scorer scorer = weight.scorer(ctx); + Scorer scorer = weight.scorer(ctx, collector.scoreMode().needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); if (scorer != null) { final DocIdSetIterator iterator = scorer.iterator(); final LeafCollector leafCollector = collector.getLeafCollector(ctx); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index be04e0092092..6c94dee59bf9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -68,8 +68,8 @@ public void extractTerms(Set terms) { } @Override - public SpanScorer scorer(LeafReaderContext context) throws IOException { - return in.scorer(context); + public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { + return in.scorer(context, postings); } @Override From 6938bbc7178ef542f443ba52309834cd9814ab14 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 14:09:02 +0000 Subject: [PATCH 03/83] Move intervals() back to Scorer Having it on Weight means duplicating loads of Scorer implementations to ensure that we always return the correct positions --- .../lucene/document/RangeFieldQuery.java | 8 +- .../SortedNumericDocValuesRangeQuery.java | 4 +- .../SortedSetDocValuesRangeQuery.java | 4 +- .../lucene/index/FrozenBufferedUpdates.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 4 +- .../apache/lucene/search/BooleanWeight.java | 20 ++-- .../org/apache/lucene/search/BoostQuery.java | 4 +- .../lucene/search/ConstantScoreQuery.java | 12 +-- .../lucene/search/ConstantScoreScorer.java | 2 +- .../lucene/search/ConstantScoreWeight.java | 2 +- .../search/DisjunctionIntervalIterator.java | 102 ++++++++++++++++++ .../lucene/search/DisjunctionMaxQuery.java | 12 +-- .../lucene/search/DisjunctionScorer.java | 15 ++- .../search/DocValuesFieldExistsQuery.java | 4 +- .../lucene/search/DocValuesRewriteMethod.java | 4 +- .../lucene/search/DoubleValuesSource.java | 6 +- .../apache/lucene/search/FilterWeight.java | 6 +- .../lucene/search/IndexOrDocValuesQuery.java | 16 +-- .../apache/lucene/search/IndexSearcher.java | 6 +- .../apache/lucene/search/IntervalFilter.java | 4 +- .../lucene/search/IntervalIterator.java | 8 +- .../apache/lucene/search/IntervalQuery.java | 17 +-- .../apache/lucene/search/IntervalScorer.java | 38 ++++++- .../org/apache/lucene/search/Intervals.java | 95 +++------------- .../apache/lucene/search/LRUQueryCache.java | 16 +-- .../lucene/search/MatchAllDocsQuery.java | 4 +- .../lucene/search/MatchNoDocsQuery.java | 4 +- .../lucene/search/MultiPhraseQuery.java | 14 +-- .../MultiTermQueryConstantScoreWrapper.java | 8 +- .../lucene/search/NormsFieldExistsQuery.java | 4 +- .../org/apache/lucene/search/PhraseQuery.java | 14 +-- .../apache/lucene/search/PointInSetQuery.java | 4 +- .../apache/lucene/search/PointRangeQuery.java | 8 +- .../java/org/apache/lucene/search/Query.java | 49 ++++++++- .../apache/lucene/search/QueryRescorer.java | 2 +- .../apache/lucene/search/ReqOptSumScorer.java | 22 ++-- .../apache/lucene/search/SynonymQuery.java | 16 +-- .../apache/lucene/search/TermInSetQuery.java | 8 +- .../org/apache/lucene/search/TermQuery.java | 16 +-- .../org/apache/lucene/search/TermScorer.java | 6 +- .../java/org/apache/lucene/search/Weight.java | 14 +-- .../search/spans/FieldMaskingSpanQuery.java | 4 +- .../lucene/search/spans/SpanBoostQuery.java | 4 +- .../search/spans/SpanContainingQuery.java | 6 +- .../spans/SpanMultiTermQueryWrapper.java | 2 +- .../lucene/search/spans/SpanNearQuery.java | 6 +- .../lucene/search/spans/SpanNotQuery.java | 6 +- .../lucene/search/spans/SpanOrQuery.java | 4 +- .../search/spans/SpanPositionCheckQuery.java | 4 +- .../apache/lucene/search/spans/SpanQuery.java | 2 +- .../lucene/search/spans/SpanTermQuery.java | 2 +- .../lucene/search/spans/SpanWeight.java | 4 +- .../lucene/search/spans/SpanWithinQuery.java | 6 +- .../lucene/search/JustCompileSearch.java | 5 + .../search/TestBoolean2ScorerSupplier.java | 5 + .../lucene/search/TestBooleanScorer.java | 2 +- .../lucene/search/TestCachingCollector.java | 5 + .../lucene/search/TestConjunctionDISI.java | 5 + .../lucene/search/TestConstantScoreQuery.java | 4 +- .../apache/lucene/search/TestIntervals.java | 45 ++++---- .../lucene/search/TestLRUQueryCache.java | 10 +- .../lucene/search/TestMinShouldMatch2.java | 5 + .../apache/lucene/search/TestNeedsScores.java | 4 +- .../lucene/search/TestPositionIncrement.java | 4 +- .../TestPositiveScoresOnlyCollector.java | 7 +- .../lucene/search/TestQueryRescorer.java | 7 +- .../TestScoreCachingWrappingScorer.java | 7 +- .../apache/lucene/search/TestScorerPerf.java | 2 +- .../apache/lucene/search/TestSortRandom.java | 2 +- .../lucene/search/TestTopDocsCollector.java | 5 + .../lucene/search/TestTopFieldCollector.java | 7 +- .../TestUsageTrackingFilterCachingPolicy.java | 2 +- .../apache/lucene/search/TestWANDScorer.java | 4 +- .../search/spans/JustCompileSearchSpans.java | 2 +- .../spans/TestFieldMaskingSpanQuery.java | 14 +-- .../search/spans/TestNearSpansOrdered.java | 27 ++--- .../search/spans/TestSpanCollection.java | 7 +- .../search/spans/TestSpanContainQuery.java | 2 +- .../apache/lucene/search/spans/TestSpans.java | 10 +- .../lucene/search/AssertingIndexSearcher.java | 4 +- .../apache/lucene/search/AssertingQuery.java | 4 +- .../lucene/search/BlockScoreQueryWrapper.java | 8 +- .../org/apache/lucene/search/QueryUtils.java | 14 +-- .../search/RandomApproximationQuery.java | 9 +- .../lucene/search/ScorerIndexSearcher.java | 2 +- .../search/spans/AssertingSpanQuery.java | 4 +- .../search/spans/AssertingSpanWeight.java | 4 +- .../search/TestBaseExplanationTestCase.java | 4 +- 88 files changed, 560 insertions(+), 346 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index d507da963f4c..9b32a1506734 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -262,7 +262,7 @@ private void checkFieldInfo(FieldInfo fieldInfo) { } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { @@ -290,7 +290,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (values == null) { @@ -350,8 +350,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 0c0efdf56660..03a7fa897339 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -93,7 +93,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -102,7 +102,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { SortedNumericDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index cd6cfadac7f6..69f542aa2e48 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -104,10 +104,10 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedSetDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { SortedSetDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index f25d7699bb88..202bf2cc49af 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -686,7 +686,7 @@ private long applyQueryDeletes(BufferedUpdatesStream.SegmentState[] segStates) t final IndexSearcher searcher = new IndexSearcher(readerContext.reader()); searcher.setQueryCache(null); final Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE_NO_SCORES); - final Scorer scorer = weight.scorer(readerContext, PostingsEnum.NONE); + final Scorer scorer = weight.scorer(readerContext); if (scorer != null) { final DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f52df9fb9cd8..f974dc04559b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -199,12 +199,12 @@ private BooleanQuery rewriteNoScoring() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } - return new BooleanWeight(query, searcher, scoreMode, boost); + return new BooleanWeight(query, searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index ea1ffed6594a..8de9394e2142 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -45,14 +45,14 @@ final class BooleanWeight extends Weight { final ArrayList weights; final ScoreMode scoreMode; - BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { super(query); this.query = query; this.scoreMode = scoreMode; this.similarity = searcher.getSimilarity(); weights = new ArrayList<>(); for (BooleanClause c : query) { - Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, boost); + Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); weights.add(w); } } @@ -113,7 +113,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio // contributions to the score to floats), so in order to make sure that // explanations have the same value as the score, we pull a scorer and // use it to compute the score. - Scorer scorer = scorer(context, PostingsEnum.NONE); + Scorer scorer = scorer(context); int advanced = scorer.iterator().advance(doc); assert advanced == doc; return Explanation.match(scorer.score(), "sum of:", subs); @@ -211,7 +211,7 @@ private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOExcept /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} * cannot be used. */ - BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOException { + BulkScorer booleanScorer(LeafReaderContext context) throws IOException { final int numOptionalClauses = query.getClauses(Occur.SHOULD).size(); final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size(); @@ -263,7 +263,7 @@ BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOExc for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isProhibited()) { - Scorer scorer = w.scorer(context, postings); + Scorer scorer = w.scorer(context); if (scorer != null) { prohibited.add(scorer); } @@ -291,7 +291,7 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { // so that we can dynamically prune non-competitive hits. return super.bulkScorer(context); } - final BulkScorer bulkScorer = booleanScorer(context, PostingsEnum.NONE); + final BulkScorer bulkScorer = booleanScorer(context); if (bulkScorer != null) { // bulk scoring is applicable, use it return bulkScorer; @@ -302,8 +302,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } @@ -326,7 +326,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); final Map> scorers = new EnumMap<>(Occur.class); @@ -337,7 +337,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) Iterator cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - ScorerSupplier subScorer = w.scorerSupplier(context, postings); + ScorerSupplier subScorer = w.scorerSupplier(context); if (subScorer == null) { if (c.isRequired()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java index 4e4649cb7100..860368240f77 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java @@ -116,8 +116,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, BoostQuery.this.boost * boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, minRequiredPostings, BoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index abdb85953dd8..269328720540 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -110,8 +110,8 @@ public long cost() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, 1f); if (scoreMode.needsScores()) { return new ConstantScoreWeight(this, boost) { @@ -125,8 +125,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context); if (innerScorerSupplier == null) { return null; } @@ -159,8 +159,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 56adcf390553..0040374b6147 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -65,7 +65,7 @@ public DocIdSetIterator iterator() { @Override public IntervalIterator intervals(String field) { - return null; + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index 57316b9f7e4a..cdf4be94f3c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -54,7 +54,7 @@ protected final float score() { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - final Scorer s = scorer(context, PostingsEnum.NONE); + final Scorer s = scorer(context); final boolean exists; if (s == null) { exists = false; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java new file mode 100644 index 000000000000..57af8a28252b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.util.PriorityQueue; + +abstract class DisjunctionIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + IntervalIterator current; + + DisjunctionIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + protected abstract void positionSubIntervals() throws IOException; + + @Override + public boolean reset(int doc) throws IOException { + positionSubIntervals(); + queue.clear(); + for (int i = 0; i < subIterators.length; i++) { + if (subIterators[i].reset(doc)) { + subIterators[i].nextInterval(); + queue.add(subIterators[i]); + } + } + current = null; + return queue.size() > 0; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = queue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (queue.size() > 0 && contains(queue.top(), start, end)) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + } + } + if (queue.size() == 0) { + current = IntervalIterator.EMPTY; + return Intervals.NO_MORE_INTERVALS; + } + current = queue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 552dff1283a5..f79d2b9cfcda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -103,10 +103,10 @@ protected class DisjunctionMaxWeight extends Weight { private final ScoreMode scoreMode; /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ - public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(DisjunctionMaxQuery.this); for (Query disjunctQuery : disjuncts) { - weights.add(searcher.createWeight(disjunctQuery, scoreMode, boost)); + weights.add(searcher.createWeight(disjunctQuery, scoreMode, minRequiredPostings, boost)); } this.scoreMode = scoreMode; } @@ -120,11 +120,11 @@ public void extractTerms(Set terms) { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context, postings); + Scorer subScorer = w.scorer(context); if (subScorer != null) { scorers.add(subScorer); } @@ -189,8 +189,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio /** Create the Weight used to score us */ @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new DisjunctionMaxWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new DisjunctionMaxWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Optimize our representation and our subqueries representations diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index f69fd936f453..9e5ab2813fd6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -182,7 +182,20 @@ public final float score() throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + List subIntervals = new ArrayList<>(); + for (DisiWrapper dw : subScorers) { + IntervalIterator subIt = dw.scorer.intervals(field); + if (subIt != null) + subIntervals.add(subIt); + } + if (subIntervals.size() == 0) + return null; + return new DisjunctionIntervalIterator(subIntervals) { + @Override + protected void positionSubIntervals() throws IOException { + getSubMatches(); + } + }; } /** Compute the score for the given linked list of scorers. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index bbf9c3ced9a3..23fbaecbd981 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -62,10 +62,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index a141cbe64a72..6f4408599e5c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -72,10 +72,10 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field); TermsEnum termsEnum = query.getTermsEnum(new Terms() { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index d951fc45db83..48579ccbee43 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -82,7 +82,7 @@ public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreEx * IndexReader-independent implementations can just return {@code this} * * Queries that use DoubleValuesSource objects should call rewrite() during - * {@link Query#createWeight(IndexSearcher, ScoreMode, float)} rather than during + * {@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)} rather than during * {@link Query#rewrite(IndexReader)} to avoid IndexReader reference leakage */ public abstract DoubleValuesSource rewrite(IndexSearcher reader) throws IOException; @@ -554,7 +554,7 @@ public boolean needsScores() { @Override public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException { - return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, 1f)); + return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f)); } @Override @@ -578,7 +578,7 @@ private WeightDoubleValuesSource(Weight weight) { @Override public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { - Scorer scorer = weight.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = weight.scorer(ctx); if (scorer == null) return DoubleValues.EMPTY; DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 3ac351029097..278ad987a225 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -47,7 +47,7 @@ protected FilterWeight(Weight weight) { /** * Alternative constructor. * Use this variant only if the weight was not obtained - * via the {@link Query#createWeight(IndexSearcher, ScoreMode, float)} + * via the {@link Query#createWeight(IndexSearcher, ScoreMode, Query.Postings, float)} * method of the query object. */ protected FilterWeight(Query query, Weight weight) { @@ -71,8 +71,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - return in.scorer(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + return in.scorer(context); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 50b04523baa3..3eb238254141 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -110,9 +110,9 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost); - final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -133,9 +133,9 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context, postings); - final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context); + final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); if (indexScorerSupplier == null || dvScorerSupplier == null) { return null; } @@ -162,8 +162,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index da5ed036ddc0..ded001453857 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -686,7 +686,7 @@ protected Explanation explain(Weight weight, int doc) throws IOException { */ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IOException { query = rewrite(query); - return createWeight(query, scoreMode, 1f); + return createWeight(query, scoreMode, Query.Postings.NONE, 1f); } /** @@ -694,9 +694,9 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO * if possible and configured. * @lucene.experimental */ - public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { final QueryCache queryCache = this.queryCache; - Weight weight = query.createWeight(this, scoreMode, boost); + Weight weight = query.createWeight(this, scoreMode, minRequiredPostings, boost); if (scoreMode.needsScores() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 852ac8070160..b2b930db570e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,7 +55,7 @@ public int innerWidth() { } @Override - public void reset() throws IOException { - in.reset(); + public boolean reset(int doc) throws IOException { + return in.reset(doc); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a85c24011e1f..a65aa1d87c5c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -29,7 +29,7 @@ public interface IntervalIterator { int innerWidth(); - void reset() throws IOException; + boolean reset(int doc) throws IOException; int nextInterval() throws IOException; @@ -55,12 +55,12 @@ public int innerWidth() { } @Override - public void reset() { - + public boolean reset(int doc) { + return false; } @Override - public int nextInterval() throws IOException { + public int nextInterval() { return Intervals.NO_MORE_INTERVALS; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 16de6fc9c1a8..1fe82eac5f75 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -57,10 +57,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(); for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_NO_SCORES, boost)); + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE, minRequiredPostings.atLeast(Postings.POSITIONS), boost)); } return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); } @@ -112,7 +112,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null && scorer.iterator().advance(doc) == doc) { return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this } @@ -120,13 +120,18 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { List subIntervals = new ArrayList<>(); List disis = new ArrayList<>(); for (Weight w : subWeights) { - Scorer scorer = w.scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = w.scorer(context); + if (scorer == null) + return null; disis.add(scorer.iterator()); - subIntervals.add(scorer.intervals(field)); + IntervalIterator it = scorer.intervals(field); + if (it == null) + return null; + subIntervals.add(it); } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index b25fa3e6f31e..e9cdc1aa4402 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -51,7 +51,40 @@ public float score() throws IOException { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) - return intervals; + return new IntervalIterator() { + boolean started = false; + + @Override + public int start() { + return intervals.start(); + } + + @Override + public int end() { + return intervals.end(); + } + + @Override + public int innerWidth() { + return intervals.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + // inner iterator already reset() in TwoPhaseIterator.matches() + started = false; + return true; + } + + @Override + public int nextInterval() throws IOException { + if (started == false) { + started = true; + return start(); + } + return intervals.nextInterval(); + } + }; return null; } @@ -65,8 +98,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.reset(); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index d8cb9a98e71f..a001235aec0e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,7 +21,6 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -78,8 +77,14 @@ public int innerWidth() { } @Override - public void reset() throws IOException { - upTo = pe.freq(); + public boolean reset(int doc) throws IOException { + if (pe.docID() == doc) { + upTo = pe.freq(); + pos = -1; + return true; + } + upTo = -1; + return false; } @Override @@ -134,13 +139,15 @@ public int innerWidth() { } @Override - public void reset() throws IOException { + public boolean reset(int doc) throws IOException { + boolean positioned = true; for (IntervalIterator it : subIntervals) { - it.reset(); + positioned &= it.reset(doc); } subIntervals.get(0).nextInterval(); i = 1; start = end = innerWidth = Integer.MIN_VALUE; + return positioned; } @Override @@ -171,82 +178,4 @@ public int nextInterval() throws IOException { } } - public static IntervalIterator or(List subIterators) { - return new DisjunctionIntervalIterator(subIterators); - } - - private static class DisjunctionIntervalIterator implements IntervalIterator { - - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; - - IntervalIterator current; - - DisjunctionIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } - } - - @Override - public int start() { - return current.start(); - } - - @Override - public int end() { - return current.end(); - } - - @Override - public int innerWidth() { - return current.innerWidth(); - } - - @Override - public void reset() throws IOException { - queue.clear(); - for (int i = 0; i < subIterators.length; i++) { - subIterators[i].reset(); - subIterators[i].nextInterval(); - queue.add(subIterators[i]); - } - current = null; - } - - @Override - public int nextInterval() throws IOException { - if (current == null) { - current = queue.top(); - return current.start(); - } - int start = current.start(), end = current.end(); - while (queue.size() > 0 && contains(queue.top(), start, end)) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { - queue.add(it); - } - } - if (queue.size() == 0) { - current = IntervalIterator.EMPTY; - return NO_MORE_INTERVALS; - } - current = queue.top(); - return current.start(); - } - - private boolean contains(IntervalIterator it, int start, int end) { - return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); - } - - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index 5b8cf917ba1d..beb73ad11159 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -715,31 +715,31 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } if (in.isCacheable(context) == false) { // this segment is not suitable for caching - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } // Short-circuit: Check whether this segment is eligible for caching // before we take a lock because of #get if (shouldCache(context) == false) { - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } // If the lock is already busy, prefer using the uncached version than waiting if (lock.tryLock() == false) { - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); if (cacheHelper == null) { // this reader has no cache helper - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } DocIdSet docIdSet; try { @@ -749,7 +749,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) } if (docIdSet == null) { - ScorerSupplier inSupplier = in.scorerSupplier(context, postings); + ScorerSupplier inSupplier = in.scorerSupplier(context); if (inSupplier == null) { putIfAbsent(in.getQuery(), context, DocIdSet.EMPTY, cacheHelper); return null; @@ -809,8 +809,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index e878924a9f99..f11861820bd3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -29,14 +29,14 @@ public final class MatchAllDocsQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) { return new ConstantScoreWeight(this, boost) { @Override public String toString() { return "weight(" + MatchAllDocsQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index d539c3d21453..74e8bdeaedda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -42,7 +42,7 @@ public MatchNoDocsQuery(String reason) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -54,7 +54,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 23eb49621252..f6154bc30685 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,11 +193,13 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; + private final Postings minRequiredPostings; - public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -236,7 +238,7 @@ public void extractTerms(Set terms) { } @Override - public Scorer scorer(LeafReaderContext context, short pf) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert termArrays.length != 0; final LeafReader reader = context.reader(); @@ -265,7 +267,7 @@ public Scorer scorer(LeafReaderContext context, short pf) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, PostingsEnum.highest(pf, PostingsEnum.POSITIONS))); + postings.add(termsEnum.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings())); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -307,7 +309,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -343,8 +345,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new MultiPhraseWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new MultiPhraseWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index cfad13791b1f..c2128927068d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -108,7 +108,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { /** Try to collect terms from the given terms enum and return true iff all @@ -153,7 +153,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); return new WeightOrDocIdSet(weight); } @@ -203,10 +203,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context, postings); + return weightOrBitSet.weight.scorer(context); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 9e639247aec8..3382dda13902 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -62,10 +62,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null || fieldInfo.hasNorms() == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index b58f1ed868aa..af94a03a25e3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,9 +353,10 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; + private final Postings minRequiredPostings; private transient TermStates states[]; - public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(PhraseQuery.this); final int[] positions = PhraseQuery.this.getPositions(); @@ -365,6 +366,7 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -396,7 +398,7 @@ public void extractTerms(Set queryTerms) { public String toString() { return "weight(" + PhraseQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert terms.length > 0; final LeafReader reader = context.reader(); PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length]; @@ -422,7 +424,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, PostingsEnum.highest(postings, PostingsEnum.POSITIONS)); + PostingsEnum postingsEnum = te.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings()); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -455,7 +457,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -510,8 +512,8 @@ static float termPositionsCost(TermsEnum termsEnum) throws IOException { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new PhraseWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new PhraseWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 325e06060336..25095400c336 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -106,7 +106,7 @@ protected PointInSetQuery(String field, int numDims, int bytesPerDim, Stream pac } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: @@ -114,7 +114,7 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 897eb5d989ea..683f737bde57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -99,7 +99,7 @@ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: @@ -224,7 +224,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); @@ -314,8 +314,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index 54de63fc02fd..aec1d9dc9d48 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -20,6 +20,8 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.search.spans.SpanWeight; /** The abstract base class for queries.

    Instantiable subclasses are: @@ -43,6 +45,51 @@ */ public abstract class Query { + /** + * Enumeration defining what postings information should be retrieved from the + * index for a given Spans + */ + public enum Postings { + NONE { + @Override + public int getRequiredPostings() { + return PostingsEnum.NONE; + } + }, + FREQS { + @Override + public int getRequiredPostings() { + return PostingsEnum.FREQS; + } + }, + POSITIONS { + @Override + public int getRequiredPostings() { + return PostingsEnum.POSITIONS; + } + }, + PAYLOADS { + @Override + public int getRequiredPostings() { + return PostingsEnum.PAYLOADS; + } + }, + OFFSETS { + @Override + public int getRequiredPostings() { + return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS; + } + }; + + public abstract int getRequiredPostings(); + + public Postings atLeast(Postings postings) { + if (postings.compareTo(this) > 0) + return postings; + return this; + } + } + /** Prints a query to a string, with field assumed to be the * default field and omitted. */ @@ -62,7 +109,7 @@ public final String toString() { * @param scoreMode How the produced scorers will be consumed. * @param boost The boost that is propagated by the parent queries. */ - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new UnsupportedOperationException("Query " + this + " does not implement createWeight"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 054a23dad595..e98099691b09 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -83,7 +83,7 @@ public int compare(ScoreDoc a, ScoreDoc b) { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext, PostingsEnum.FREQS); + scorer = weight.scorer(readerContext); } if (scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index d8be205f37de..5a502024d781 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -192,7 +192,12 @@ public IntervalIterator intervals(String field) { return reqIntervals; if (reqIntervals == null) return optIntervals; - return Intervals.or(Arrays.asList(reqIntervals, optIntervals)); + return new DisjunctionIntervalIterator(Arrays.asList(reqIntervals, optIntervals)) { + @Override + protected void positionSubIntervals() throws IOException { + positionOptionalScorers(); + } + }; } @Override @@ -203,9 +208,17 @@ public int docID() { @Override public float score() throws IOException { // TODO: sum into a double and cast to float if we ever send required clauses to BS1 - int curDoc = reqScorer.docID(); + positionOptionalScorers(); float score = reqScorer.score(); + if (optScorer.docID() == reqScorer.docID()) { + score += optScorer.score(); + } + + return score; + } + private void positionOptionalScorers() throws IOException { + int curDoc = reqScorer.docID(); int optScorerDoc = optApproximation.docID(); if (optScorerDoc < curDoc) { optScorerDoc = optApproximation.advance(curDoc); @@ -213,11 +226,6 @@ public float score() throws IOException { optScorerDoc = optApproximation.nextDoc(); } } - if (optScorerDoc == curDoc) { - score += optScorer.score(); - } - - return score; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 9fe7dbbbecf5..c9f44f09c681 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -112,16 +112,16 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { if (scoreMode.needsScores()) { - return new SynonymWeight(this, searcher, boost); + return new SynonymWeight(this, searcher, minRequiredPostings, boost); } else { // if scores are not needed, let BooleanWeight deal with optimizing that case. BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Term term : terms) { bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } - return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); } } @@ -129,8 +129,9 @@ class SynonymWeight extends Weight { private final TermStates termStates[]; private final Similarity similarity; private final Similarity.SimScorer simWeight; + private final Postings minRequiredPostings; - SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException { + SynonymWeight(Query query, IndexSearcher searcher, Postings minRequiredPostings, float boost) throws IOException { super(query); CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field()); long docFreq = 0; @@ -151,6 +152,7 @@ class SynonymWeight extends Weight { } else { this.simWeight = null; // no terms exist at all, we won't use similarity } + this.minRequiredPostings = minRequiredPostings; } @Override @@ -162,7 +164,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.FREQS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -188,7 +190,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { IndexOptions indexOptions = IndexOptions.NONE; if (terms.length > 0) { FieldInfo info = context.reader() @@ -209,7 +211,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, postings, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, minRequiredPostings, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 941e39227ca5..e08cada8d184 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -209,7 +209,7 @@ private static class WeightOrDocIdSet { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -273,7 +273,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(t.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); return new WeightOrDocIdSet(weight); } else { assert builder != null; @@ -309,12 +309,12 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet == null) { return null; } else if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context, postings); + return weightOrBitSet.weight.scorer(context); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index d6e0386a2540..e2be41a7131f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -47,14 +47,16 @@ final class TermWeight extends Weight { private final Similarity.SimScorer simScorer; private final TermStates termStates; private final ScoreMode scoreMode; + private final Postings minRequiredPostings; - public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, - float boost, TermStates termStates) throws IOException { + public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, + float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.termStates = termStates; this.similarity = searcher.getSimilarity(); @@ -87,7 +89,7 @@ public String toString() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { @@ -99,7 +101,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, postings, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, minRequiredPostings, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -145,7 +147,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - TermScorer scorer = (TermScorer) scorer(context, PostingsEnum.FREQS); + TermScorer scorer = (TermScorer) scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -186,7 +188,7 @@ public Term getTerm() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState; if (perReaderTermState == null @@ -197,7 +199,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo termState = this.perReaderTermState; } - return new TermWeight(searcher, scoreMode, boost, termState); + return new TermWeight(searcher, scoreMode, minRequiredPostings, boost, termState); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 7162d481444e..89efa028475a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -45,12 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, short postings, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, Query.Postings minRequiredPostings, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.highest(postings, PostingsEnum.FREQS)); + impactsEnum = te.impacts(docScorer.getSimScorer(), minRequiredPostings.atLeast(Query.Postings.FREQS).getRequiredPostings()); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,7 +107,7 @@ public long cost() { } }; } else { - short pf = PostingsEnum.highest(scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE, postings); + int pf = minRequiredPostings.atLeast(scoreMode.needsScores() ? Query.Postings.FREQS : Query.Postings.NONE).getRequiredPostings(); postingsEnum = te.postings(null, pf); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 98788582ae60..244056313f78 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -37,16 +37,16 @@ * {@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link Scorer}. *

    * Since {@link Weight} creates {@link Scorer} instances for a given - * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}) + * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

    * A Weight is used in the following way: *

      *
    1. A Weight is constructed by a top-level query, given a - * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). + * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)}). *
    2. A Scorer is constructed by - * {@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}. + * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. *
    * * @since 2.9 @@ -101,7 +101,7 @@ public final Query getQuery() { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(LeafReaderContext context, short postings) throws IOException; + public abstract Scorer scorer(LeafReaderContext context) throws IOException; /** * Optional method. @@ -110,8 +110,8 @@ public final Query getQuery() { * builds a {@link ScorerSupplier} wrapper around it. * @see #scorer */ - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - final Scorer scorer = scorer(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final Scorer scorer = scorer(context); if (scorer == null) { return null; } @@ -145,7 +145,7 @@ public long cost() { */ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.NONE); + Scorer scorer = scorer(context); if (scorer == null) { // No docs match return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 4a4c4fbae993..1abea327ec17 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -90,8 +90,8 @@ public SpanQuery getMaskedQuery() { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return maskedQuery.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return maskedQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java index 9556959a3ed2..2b600ffe8c41 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java @@ -109,8 +109,8 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, SpanBoostQuery.this.boost * boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, minRequiredPostings, SpanBoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 63662994bf14..b408b39dcb93 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -44,9 +44,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index 088e73092de9..fd79ad60c16a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -96,7 +96,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new IllegalArgumentException("Rewrite first!"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 17b9e5151304..199f951aadb8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -178,10 +178,10 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); } return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, boost); } @@ -307,7 +307,7 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new SpanGapWeight(searcher, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 6c56df3abee6..e8c74f33763a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -98,9 +98,9 @@ public String toString(String field) { @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight includeWeight = include.createWeight(searcher, scoreMode, boost); - SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight includeWeight = include.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermStates(includeWeight) : null, includeWeight, excludeWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 849edaa30e6e..fb0f0aac7dce 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -116,10 +116,10 @@ public int hashCode() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(clauses.size()); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); } return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, subWeights, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 099b627e1ee3..75aecc0a1fe0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -68,8 +68,8 @@ public SpanPositionCheckQuery(SpanQuery match) { protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index ca657b6cff1f..b50010fd8b85 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query { public abstract String getField(); @Override - public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException; + public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException; /** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 9ac7afb81ee3..5d8ad6400fac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -65,7 +65,7 @@ public SpanTermQuery(Term term, TermStates termStates) { public String getField() { return term.field(); } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); if (termStates == null || termStates.wasBuiltFor(topContext) == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index d24887f3eb8c..25b58fdc39a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -124,7 +124,7 @@ private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searc public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException; @Override - public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { + public SpanScorer scorer(LeafReaderContext context) throws IOException { final Spans spans = getSpans(context, Postings.POSITIONS); if (spans == null) { return null; @@ -145,7 +145,7 @@ public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - SpanScorer scorer = scorer(context, PostingsEnum.POSITIONS); + SpanScorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index fba85fe6e86a..7f29612cc710 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -45,9 +45,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 1657f9b9ced1..f80bd5d82d63 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -189,6 +189,11 @@ public int docID() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSimilarity extends Similarity { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java index 3118fa85394c..21b2ea3285f2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java @@ -59,6 +59,11 @@ public DocIdSetIterator iterator() { return it; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public String toString() { return "FakeScorer(cost=" + it.cost() + ")"; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 8a8379be3432..dab8e7923328 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -77,7 +77,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(CrazyMustUseBulkScorerQuery.this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index 12136b5b318a..3933b07e02c8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -44,6 +44,11 @@ private MockScorer() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } private static class NoOpCollector extends SimpleCollector { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java index 083ac248df91..f105216baae5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java @@ -150,6 +150,11 @@ public float score() throws IOException { public float getMaxScore(int upTo) throws IOException { return 0; } + + @Override + public IntervalIterator intervals(String field) { + return null; + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index f3382a5bf6c7..86c92f7cb937 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -135,8 +135,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return in.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return in.createWeight(searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index fb3369de885a..53f589c90117 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Arrays; +import com.carrotsearch.randomizedtesting.annotations.Seed; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -39,6 +40,7 @@ import org.junit.AfterClass; import org.junit.BeforeClass; +@Seed("98A904E565FC8F70:BF2EBE6100A16015") public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { @@ -83,48 +85,55 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - private void checkIntervals(Query query, String field, int[][] expected) throws IOException { - Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); + private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { + Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, Query.Postings.POSITIONS, 1f); + int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - Scorer scorer = weight.scorer(ctx, PostingsEnum.POSITIONS); + Scorer scorer = weight.scorer(ctx); + if (scorer == null) + continue; assertNull(scorer.intervals(field + "1")); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = scorer.intervals("field1"); + IntervalIterator intervals = scorer.intervals(field); DocIdSetIterator it = scorer.iterator(); - int matchedDocs = 0; for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - intervals.reset(); - int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - assertEquals(expected[id][i], pos); - assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i + 1], intervals.end()); - i += 2; + System.out.println(id); + if (intervals.reset(doc)) { + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; + } + assertEquals(expected[id].length, i); + } + else { + assertEquals(0, expected[id].length); } - assertEquals(expected[id].length, i); } - assertEquals(expected.length, matchedDocs); } + assertEquals(expectedMatchCount, matchedDocs); } public void testTermQueryIntervals() throws IOException { - checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", new int[][]{ + checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", 4, new int[][]{ {}, { 1, 1, 4, 4, 7, 7 }, { 1, 1, 4, 4, 7, 7 }, {}, { 1, 1, 4, 4, 7, 7 }, - { 0 } + { 0, 0 } }); } public void testOrderedNearIntervals() throws IOException { checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), - "field1", new int[][]{ + "field1", 3, new int[][]{ {}, { 0, 2, 6, 17 }, { 3, 5, 6, 21 }, @@ -138,7 +147,7 @@ public void testIntervalDisjunction() throws IOException { checkIntervals(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .build(), "field1", new int[][]{ + .build(), "field1", 4, new int[][]{ {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index f6b1c7375f03..d840230ec266 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -346,7 +346,7 @@ private static class DummyQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -939,7 +939,7 @@ private static class BadQuery extends Query { int[] i = new int[] {42}; // an array so that clone keeps the reference @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1273,7 +1273,7 @@ public void testReaderNotSuitedForCaching() throws IOException { private static class NoCacheQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -1350,7 +1350,7 @@ private static class DummyQuery2 extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1449,7 +1449,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, 1) { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index f60435c57a30..0f90b1c18a94 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -402,5 +402,10 @@ public int docID() { } }; } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java index 9352f72f97b7..0cb4462a49dc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java @@ -100,8 +100,8 @@ static class AssertNeedsScores extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight w = in.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight w = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new FilterWeight(w) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 9348862387d6..64db26e3d351 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -252,7 +252,7 @@ public void testPayloadsPos0() throws Exception { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); - Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); + Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.PAYLOADS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { @@ -274,7 +274,7 @@ public void testPayloadsPos0() throws Exception { assertEquals(8, count); // System.out.println("\ngetSpans test"); - Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 9fbd6a46b56f..9b1460bb7d0e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -69,6 +69,11 @@ public long cost() { } }; } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } // The scores must have positive as well as negative values @@ -97,7 +102,7 @@ public void testNegativeScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f); Scorer s = new SimpleScorer(fake); TopDocsCollector tdc = TopScoreDocCollector.create(scores.length); Collector c = new PositiveScoresOnlyCollector(tdc); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index d1f307d063ec..9ba43ae6b6ec 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -418,7 +418,7 @@ public FixedScoreQuery(int[] idToNum, boolean reverse) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(FixedScoreQuery.this) { @@ -437,6 +437,11 @@ public int docID() { return docID; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 257310176740..53ecd49b6984 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -50,6 +50,11 @@ public float getMaxScore(int upTo) throws IOException { @Override public int docID() { return doc; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { @@ -117,7 +122,7 @@ public void testGetScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.FREQS, 1f); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); scc.setScorer(s); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 59a246cb6647..22b42f8493b2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -149,7 +149,7 @@ private static class BitSetQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java index 05b016c31c35..8afaa2db8958 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java @@ -229,7 +229,7 @@ public RandomQuery(long seed, float density, List docValues) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index 17c5f85dd898..c0f6b2401cb1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -245,6 +245,11 @@ public float getMaxScore(int upTo) throws IOException { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + + @Override + public IntervalIterator intervals(String field) { + return null; + } } public void testSetMinCompetitiveScore() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index f12e9100d656..6600b6ee92e4 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -264,7 +264,12 @@ public int docID() { public DocIdSetIterator iterator() { return scorer.iterator(); } - + + @Override + public IntervalIterator intervals(String field) { + return scorer.intervals(field); + } + }; super.setScorer(s); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index 670df770e67c..8f2bcf5bdceb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -118,7 +118,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(DummyQuery.this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java index 5367dbcd3f05..0bffdc7ffb4d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java @@ -336,8 +336,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new FilterWeight(query.createWeight(searcher, scoreMode, boost)) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new FilterWeight(query.createWeight(searcher, scoreMode, minRequiredPostings, boost)) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { Scorer scorer = super.scorer(context); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 3244c1d5ef81..92e99abb2ad7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -94,7 +94,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index f72ea664b937..74c9fee00668 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -143,7 +143,7 @@ public void testRewrite0() throws Exception { QueryUtils.checkEqual(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); assertEquals(1, terms.size()); } @@ -163,7 +163,7 @@ public Query rewrite(IndexReader reader) { QueryUtils.checkUnequal(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); assertEquals(2, terms.size()); } @@ -177,7 +177,7 @@ public void testRewrite2() throws Exception { QueryUtils.checkEqual(q, qr); HashSet set = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE, 1f).extractTerms(set); + qr.createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f).extractTerms(set); assertEquals(2, set.size()); } @@ -253,7 +253,7 @@ public void testSpans0() throws Exception { SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,0,1); assertNext(span, 1,1,2); @@ -275,8 +275,8 @@ public void testSpans1() throws Exception { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); - Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); while (spanA.nextDoc() != Spans.NO_MORE_DOCS) { assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc()); @@ -301,7 +301,7 @@ public void testSpans2() throws Exception { new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,1,2); assertNext(span, 2,0,1); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 072d3818490a..d38db81b19bd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TopDocs; @@ -122,7 +123,7 @@ public String s(int doc, int start, int end) { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span,0,0,3); assertNext(span,1,0,4); assertFinished(span); @@ -135,7 +136,7 @@ public void testNearSpansNext() throws Exception { */ public void testNearSpansAdvanceLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -147,7 +148,7 @@ public void testNearSpansAdvanceLikeNext() throws Exception { public void testNearSpansNextThenAdvance() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -159,7 +160,7 @@ public void testNearSpansNextThenAdvance() throws Exception { public void testNearSpansNextThenAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -168,13 +169,13 @@ public void testNearSpansNextThenAdvancePast() throws Exception { public void testNearSpansAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } public void testNearSpansAdvanceTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -182,7 +183,7 @@ public void testNearSpansAdvanceTo0() throws Exception { public void testNearSpansAdvanceTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, span.advance(1)); assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); @@ -221,7 +222,7 @@ public void testOrderedSpanIteration() throws Exception { new SpanOrQuery(new SpanTermQuery(new Term(FIELD, "w1")), new SpanTermQuery(new Term(FIELD, "w2"))), new SpanTermQuery(new Term(FIELD, "w4")) }, 10, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,0,0,4); assertNext(spans,0,1,4); assertFinished(spans); @@ -231,7 +232,7 @@ public void testOrderedSpanIterationSameTerms1() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,0,2); assertFinished(spans); } @@ -240,7 +241,7 @@ public void testOrderedSpanIterationSameTerms2() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t2")), new SpanTermQuery(new Term(FIELD, "t1")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,1,4); assertNext(spans,4,2,4); assertFinished(spans); @@ -264,7 +265,7 @@ public void testGaps() throws Exception { .addGap(1) .addClause(new SpanTermQuery(new Term(FIELD, "w2"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 1, 0, 3); assertNext(spans, 2, 0, 3); assertFinished(spans); @@ -277,7 +278,7 @@ public void testGaps() throws Exception { .addClause(new SpanTermQuery(new Term(FIELD, "w3"))) .setSlop(1) .build(); - spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 2, 0, 5); assertNext(spans, 3, 0, 6); assertFinished(spans); @@ -289,7 +290,7 @@ public void testMultipleGaps() throws Exception { .addGap(2) .addClause(new SpanTermQuery(new Term(FIELD, "g"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 5, 0, 4); assertNext(spans, 5, 9, 13); assertFinished(spans); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index ff9327526d22..6c2d28c5c0b1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -120,7 +121,7 @@ public void testNestedNearQuery() throws IOException { SpanNearQuery q7 = new SpanNearQuery(new SpanQuery[]{q1, q6}, 1, true); TermCollector collector = new TermCollector(); - Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.advance(0)); spans.nextStartPosition(); checkCollectedTerms(spans, collector, new Term(FIELD, "w1"), new Term(FIELD, "w2"), new Term(FIELD, "w3")); @@ -140,7 +141,7 @@ public void testOrQuery() throws IOException { SpanOrQuery orQuery = new SpanOrQuery(q2, q3); TermCollector collector = new TermCollector(); - Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.advance(1)); spans.nextStartPosition(); @@ -170,7 +171,7 @@ public void testSpanNotQuery() throws IOException { SpanNotQuery notq = new SpanNotQuery(nq, q3); TermCollector collector = new TermCollector(); - Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(2, spans.advance(2)); spans.nextStartPosition(); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java index b4cad767706d..f7b408877bd5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java @@ -73,7 +73,7 @@ void checkHits(Query query, int[] results) throws Exception { } Spans makeSpans(SpanQuery sq) throws Exception { - return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } void tstEqualSpans(String mes, SpanQuery expectedQ, SpanQuery actualQ) throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index 151c8ee16108..d8b9f9216a8a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -194,7 +194,7 @@ public void testSpanNearOrderedEqual15() throws Exception { public void testSpanNearOrderedOverlap() throws Exception { final SpanQuery query = spanNearOrderedQuery(field, 1, "t1", "t2", "t3"); - Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals("first doc", 11, spans.nextDoc()); assertEquals("first start", 0, spans.nextStartPosition()); @@ -209,7 +209,7 @@ public void testSpanNearOrderedOverlap() throws Exception { public void testSpanNearUnOrdered() throws Exception { //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test SpanQuery senq = spanNearUnorderedQuery(field, 0, "u1", "u2"); - Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 1, 3); assertNext(spans, 5, 2, 4); assertNext(spans, 8, 2, 4); @@ -218,7 +218,7 @@ public void testSpanNearUnOrdered() throws Exception { assertFinished(spans); senq = spanNearUnorderedQuery(1, senq, spanTermQuery(field, "u2")); - spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 0, 3); assertNext(spans, 4, 1, 3); // unordered spans can be subsets assertNext(spans, 5, 0, 4); @@ -232,7 +232,7 @@ public void testSpanNearUnOrdered() throws Exception { } private Spans orSpans(String[] terms) throws Exception { - return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } public void testSpanOrEmpty() throws Exception { @@ -414,7 +414,7 @@ private int spanCount(String include, int slop, String exclude, int pre, int pos SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms); SpanQuery eq = spanTermQuery(field, exclude); SpanQuery snq = spanNotQuery(iq, eq, pre, post); - Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); int i = 0; if (spans != null) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 2a6376df1f17..4d98f89017a2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -52,9 +52,9 @@ public AssertingIndexSearcher(Random random, IndexReaderContext context, Execut } @Override - public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { // this adds assertions to the inner weights/scorers too - return new AssertingWeight(random, super.createWeight(query, scoreMode, boost), scoreMode); + return new AssertingWeight(random, super.createWeight(query, scoreMode, minRequiredPostings, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java index b3d2f8116c44..e136eaa1023e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java @@ -39,9 +39,9 @@ public static Query wrap(Random random, Query query) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { assert boost >= 0; - return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, boost), scoreMode); + return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, minRequiredPostings, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 3b9a740a448f..98e56a255875 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -73,8 +73,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight inWeight = query.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight inWeight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); if (scoreMode.needsScores() == false) { return inWeight; } @@ -196,6 +196,10 @@ public float getMaxScore(int upTo) throws IOException { return max; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index b4e25a37d649..71592a8a3701 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -312,7 +312,7 @@ public void collect(int doc) throws IOException { if (scorer == null) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context, PostingsEnum.FREQS); + scorer = w.scorer(context); iterator = scorer.iterator(); } @@ -377,7 +377,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = (LeafReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = w.scorer(ctx); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -407,7 +407,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = w.scorer(ctx); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -445,7 +445,7 @@ public void collect(int doc) throws IOException { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer(context.get(leafPtr), PostingsEnum.FREQS); + Scorer scorer = w.scorer(context.get(leafPtr)); Assert.assertTrue("query collected "+doc+" but advance("+i+") says no more docs!",scorer.iterator().advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but advance("+i+") got to "+scorer.docID(),doc,scorer.docID()); float advanceScore = scorer.score(); @@ -478,7 +478,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -506,7 +506,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -526,7 +526,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { public static void checkBulkScorerSkipTo(Random r, Query query, IndexSearcher searcher) throws IOException { Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext context : searcher.getIndexReader().leaves()) { - final Scorer scorer = weight.scorer(context, PostingsEnum.FREQS); + final Scorer scorer = weight.scorer(context); final BulkScorer bulkScorer = weight.bulkScorer(context); if (scorer == null && bulkScorer == null) { continue; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index a050b50401cb..8c408b17276c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -62,8 +62,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight weight = query.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight weight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new RandomApproximationWeight(weight, new Random(random.nextLong())); } @@ -108,6 +108,11 @@ public float score() throws IOException { return scorer.score(); } + @Override + public IntervalIterator intervals(String field) { + return scorer.intervals(field); + } + @Override public int advanceShallow(int target) throws IOException { return scorer.advanceShallow(target); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index dd2fd884f076..97c5c7a1338e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -49,7 +49,7 @@ protected void search(List leaves, Weight weight, Collector c // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren - Scorer scorer = weight.scorer(ctx, collector.scoreMode().needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); + Scorer scorer = weight.scorer(ctx); if (scorer != null) { final DocIdSetIterator iterator = scorer.iterator(); final LeafCollector leafCollector = collector.getLeafCollector(ctx); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index f24a4ff8fe37..bcd9bf1563dc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -43,8 +43,8 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight weight = in.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight weight = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new AssertingSpanWeight(searcher, weight); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index 6c94dee59bf9..be04e0092092 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -68,8 +68,8 @@ public void extractTerms(Set terms) { } @Override - public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { - return in.scorer(context, postings); + public SpanScorer scorer(LeafReaderContext context) throws IOException { + return in.scorer(context); } @Override diff --git a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java index 2f0f067c5136..d36f3e21c72a 100644 --- a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java +++ b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java @@ -72,8 +72,8 @@ public BrokenExplainTermQuery(Term t, boolean toggleExplainMatch, boolean breakE this.breakExplainScores = breakExplainScores; } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, boost)); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, minRequiredPostings, boost)); } } From e169ffc64a3d1ca3c506b6a86ec45861cf02609e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 16:34:41 +0000 Subject: [PATCH 04/83] Add unorderedNearQuery --- .../search/DisjunctionIntervalIterator.java | 8 +- .../lucene/search/IntervalFunction.java | 36 +++++++ .../apache/lucene/search/IntervalQuery.java | 4 + .../org/apache/lucene/search/Intervals.java | 95 +++++++++++++++++++ .../apache/lucene/search/TestIntervals.java | 19 +++- 5 files changed, 154 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 57af8a28252b..a7df0b4d59a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -64,10 +64,10 @@ public int innerWidth() { public boolean reset(int doc) throws IOException { positionSubIntervals(); queue.clear(); - for (int i = 0; i < subIterators.length; i++) { - if (subIterators[i].reset(doc)) { - subIterators[i].nextInterval(); - queue.add(subIterators[i]); + for (IntervalIterator subIterator : subIterators) { + if (subIterator.reset(doc)) { + subIterator.nextInterval(); + queue.add(subIterator); } } current = null; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 0db038febea1..f3adf6c02076 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -67,4 +67,40 @@ public int hashCode() { } } + public static class UnorderedNearFunction extends IntervalFunction { + + final int minWidth; + final int maxWidth; + + public UnorderedNearFunction(int minWidth, int maxWidth) { + this.minWidth = minWidth; + this.maxWidth = maxWidth; + } + + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.innerWidthFilter(Intervals.unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); + } + + @Override + public String toString() { + return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnorderedNearFunction that = (UnorderedNearFunction) o; + return minWidth == that.minWidth && + maxWidth == that.maxWidth; + } + + @Override + public int hashCode() { + return Objects.hash(minWidth, maxWidth); + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 1fe82eac5f75..f25d8004eac9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -40,6 +40,10 @@ public static IntervalQuery orderedNearQuery(String field, int width, Query... s return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } + public static IntervalQuery unorderedNearQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); + } + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index a001235aec0e..d7bd588728b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -178,4 +179,98 @@ public int nextInterval() throws IOException { } } + public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + for (IntervalIterator it : subIntervals) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new UnorderedIntervalIterator(subIntervals); + } + + private static class UnorderedIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + int start, end, innerStart, innerEnd, queueEnd; + + UnorderedIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerEnd - innerStart + 1; + } + + @Override + public boolean reset(int doc) throws IOException { + this.queue.clear(); + this.queueEnd = start = end = innerEnd = innerStart = -1; + boolean positioned = true; + for (IntervalIterator subIterator : subIterators) { + positioned &= subIterator.reset(doc); + subIterator.nextInterval(); + queue.add(subIterator); + queueEnd = Math.max(queueEnd, subIterator.end()); + } + return positioned; + } + + void updateRightExtreme(IntervalIterator it) { + int itEnd = it.end(); + if (itEnd > queueEnd) { + queueEnd = itEnd; + innerEnd = it.start(); + } + } + + @Override + public int nextInterval() throws IOException { + while (this.queue.size() == subIterators.length && queue.top().start() == start) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } + if (this.queue.size() < subIterators.length) + return NO_MORE_INTERVALS; + do { + start = queue.top().start(); + innerStart = queue.top().end(); + end = queueEnd; + if (queue.top().end() == end) + return start; + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && end == queueEnd); + return start; + } + + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 53f589c90117..c7a73761aea9 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -40,14 +40,13 @@ import org.junit.AfterClass; import org.junit.BeforeClass; -@Seed("98A904E565FC8F70:BF2EBE6100A16015") public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", - "Nor here, nowt hot going on in this one", + "Nor here, nowt hot going on in pease this one", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", "Porridge is great" }; @@ -100,7 +99,6 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - System.out.println(id); if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { @@ -143,6 +141,19 @@ public void testOrderedNearIntervals() throws IOException { }); } + public void testUnorderedNearIntervals() throws IOException { + checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + "field1", 3, new int[][]{ + {}, + { 0, 2, 2, 3, 6, 17 }, + { 3, 5, 5, 6, 6, 21 }, + { 3, 7 }, + { 0, 2, 2, 3, 6, 17 }, + {} + }); + } + public void testIntervalDisjunction() throws IOException { checkIntervals(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) @@ -151,7 +162,7 @@ public void testIntervalDisjunction() throws IOException { {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, - { 3, 3 }, + { 3, 3, 7, 7 }, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, {} }); From fc2d0bb65a51d58334d031ed2858245bc3a609b1 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 18:08:50 +0000 Subject: [PATCH 05/83] test for more complex queries --- .../org/apache/lucene/search/TestIntervals.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index c7a73761aea9..330fbb76dfb8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -167,4 +167,21 @@ public void testIntervalDisjunction() throws IOException { {} }); } + + public void testNesting() throws IOException { + checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), + new TermQuery(new Term("field1", "porridge")), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) + .build()), "field1", 3, new int[][]{ + {}, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + {}, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + {} + }); + } } From adc63477ac535b86069572a7561c97596d084bb6 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 10:13:52 +0000 Subject: [PATCH 06/83] Use ScoreMode to pass postings flags, add scoring to IntervalQuery --- .../lucene/document/RangeFieldQuery.java | 2 +- .../SortedNumericDocValuesRangeQuery.java | 2 +- .../SortedSetDocValuesRangeQuery.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 4 +- .../apache/lucene/search/BooleanWeight.java | 4 +- .../org/apache/lucene/search/BoostQuery.java | 4 +- .../lucene/search/ConstantScoreQuery.java | 4 +- .../lucene/search/DisjunctionMaxQuery.java | 8 ++-- .../search/DocValuesFieldExistsQuery.java | 2 +- .../lucene/search/DocValuesRewriteMethod.java | 2 +- .../lucene/search/DoubleValuesSource.java | 4 +- .../lucene/search/IndexOrDocValuesQuery.java | 6 +-- .../apache/lucene/search/IndexSearcher.java | 8 ++-- .../apache/lucene/search/IntervalQuery.java | 32 ++++++++----- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MatchNoDocsQuery.java | 2 +- .../lucene/search/MultiPhraseQuery.java | 12 ++--- .../MultiTermQueryConstantScoreWrapper.java | 4 +- .../lucene/search/NormsFieldExistsQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 12 ++--- .../apache/lucene/search/PointInSetQuery.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 2 +- .../java/org/apache/lucene/search/Query.java | 47 +------------------ .../org/apache/lucene/search/ScoreMode.java | 41 ++++++++++++++++ .../apache/lucene/search/SynonymQuery.java | 12 ++--- .../apache/lucene/search/TermInSetQuery.java | 4 +- .../org/apache/lucene/search/TermQuery.java | 10 ++-- .../org/apache/lucene/search/TermScorer.java | 7 ++- .../search/spans/FieldMaskingSpanQuery.java | 4 +- .../lucene/search/spans/SpanBoostQuery.java | 4 +- .../search/spans/SpanContainingQuery.java | 6 +-- .../spans/SpanMultiTermQueryWrapper.java | 2 +- .../lucene/search/spans/SpanNearQuery.java | 6 +-- .../lucene/search/spans/SpanNotQuery.java | 6 +-- .../lucene/search/spans/SpanOrQuery.java | 4 +- .../search/spans/SpanPositionCheckQuery.java | 4 +- .../apache/lucene/search/spans/SpanQuery.java | 2 +- .../lucene/search/spans/SpanTermQuery.java | 2 +- .../lucene/search/spans/SpanWithinQuery.java | 6 +-- .../lucene/search/TestBooleanScorer.java | 2 +- .../lucene/search/TestConstantScoreQuery.java | 4 +- .../apache/lucene/search/TestIntervals.java | 2 +- .../lucene/search/TestLRUQueryCache.java | 10 ++-- .../apache/lucene/search/TestNeedsScores.java | 4 +- .../lucene/search/TestPositionIncrement.java | 4 +- .../TestPositiveScoresOnlyCollector.java | 2 +- .../lucene/search/TestQueryRescorer.java | 2 +- .../TestScoreCachingWrappingScorer.java | 2 +- .../apache/lucene/search/TestScorerPerf.java | 2 +- .../apache/lucene/search/TestSortRandom.java | 2 +- .../TestUsageTrackingFilterCachingPolicy.java | 2 +- .../apache/lucene/search/TestWANDScorer.java | 4 +- .../search/spans/JustCompileSearchSpans.java | 2 +- .../spans/TestFieldMaskingSpanQuery.java | 14 +++--- .../search/spans/TestNearSpansOrdered.java | 26 +++++----- .../search/spans/TestSpanCollection.java | 6 +-- .../search/spans/TestSpanContainQuery.java | 2 +- .../apache/lucene/search/spans/TestSpans.java | 10 ++-- .../lucene/search/AssertingIndexSearcher.java | 4 +- .../apache/lucene/search/AssertingQuery.java | 4 +- .../lucene/search/BlockScoreQueryWrapper.java | 4 +- .../search/RandomApproximationQuery.java | 4 +- .../search/spans/AssertingSpanQuery.java | 4 +- .../search/TestBaseExplanationTestCase.java | 4 +- 64 files changed, 208 insertions(+), 207 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index 9b32a1506734..a24b7cdfae58 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -262,7 +262,7 @@ private void checkFieldInfo(FieldInfo fieldInfo) { } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 03a7fa897339..246b50f3dab6 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -93,7 +93,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index 69f542aa2e48..de7c11b1cc9a 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -104,7 +104,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedSetDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f974dc04559b..f52df9fb9cd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -199,12 +199,12 @@ private BooleanQuery rewriteNoScoring() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } - return new BooleanWeight(query, searcher, scoreMode, minRequiredPostings, boost); + return new BooleanWeight(query, searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 8de9394e2142..829d72a5ff19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -45,14 +45,14 @@ final class BooleanWeight extends Weight { final ArrayList weights; final ScoreMode scoreMode; - BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(query); this.query = query; this.scoreMode = scoreMode; this.similarity = searcher.getSimilarity(); weights = new ArrayList<>(); for (BooleanClause c : query) { - Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, boost); weights.add(w); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java index 860368240f77..4e4649cb7100 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java @@ -116,8 +116,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, minRequiredPostings, BoostQuery.this.boost * boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, BoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 269328720540..464cde6a45f9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -110,8 +110,8 @@ public long cost() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, 1f); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f); if (scoreMode.needsScores()) { return new ConstantScoreWeight(this, boost) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index f79d2b9cfcda..1e67cb150465 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -103,10 +103,10 @@ protected class DisjunctionMaxWeight extends Weight { private final ScoreMode scoreMode; /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ - public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(DisjunctionMaxQuery.this); for (Query disjunctQuery : disjuncts) { - weights.add(searcher.createWeight(disjunctQuery, scoreMode, minRequiredPostings, boost)); + weights.add(searcher.createWeight(disjunctQuery, scoreMode, boost)); } this.scoreMode = scoreMode; } @@ -189,8 +189,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio /** Create the Weight used to score us */ @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new DisjunctionMaxWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new DisjunctionMaxWeight(searcher, scoreMode, boost); } /** Optimize our representation and our subqueries representations diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index 23fbaecbd981..009f11cf116f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -62,7 +62,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index 6f4408599e5c..5d591983fab0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -72,7 +72,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 48579ccbee43..3c52172019dd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -82,7 +82,7 @@ public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreEx * IndexReader-independent implementations can just return {@code this} * * Queries that use DoubleValuesSource objects should call rewrite() during - * {@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)} rather than during + * {@link Query#createWeight(IndexSearcher, ScoreMode, float)} rather than during * {@link Query#rewrite(IndexReader)} to avoid IndexReader reference leakage */ public abstract DoubleValuesSource rewrite(IndexSearcher reader) throws IOException; @@ -554,7 +554,7 @@ public boolean needsScores() { @Override public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException { - return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f)); + return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, 1f)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 3eb238254141..f89924d16054 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -110,9 +110,9 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); - final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost); + final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost); return new Weight(this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index ded001453857..c23d3da347d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -686,7 +686,7 @@ protected Explanation explain(Weight weight, int doc) throws IOException { */ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IOException { query = rewrite(query); - return createWeight(query, scoreMode, Query.Postings.NONE, 1f); + return createWeight(query, scoreMode, 1f); } /** @@ -694,10 +694,10 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO * if possible and configured. * @lucene.experimental */ - public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; - Weight weight = query.createWeight(this, scoreMode, minRequiredPostings, boost); - if (scoreMode.needsScores() == false && queryCache != null) { + Weight weight = query.createWeight(this, scoreMode, boost); + if (scoreMode.useQueryCache() && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index f25d8004eac9..f409de96817f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -28,6 +29,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; public final class IntervalQuery extends Query { @@ -61,22 +63,29 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(); for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE, minRequiredPostings.atLeast(Postings.POSITIONS), boost)); + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); } - return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); + return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(searcher, subWeights, boost) : null, scoreMode); } - private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights) { - // nocommit - return new Similarity.SimScorer(field) { - @Override - public float score(float freq, long norm) { - return 1; + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights, float boost) throws IOException { + Set terms = new HashSet<>(); + for (Weight w : subWeights) { + w.extractTerms(terms); // nocommit can we do this without building TermStates twice? + } + TermStatistics[] termStats = new TermStatistics[terms.size()]; + int termUpTo = 0; + for (Term term : terms) { + TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.readerContext, term, true)); + if (termStatistics != null) { + termStats[termUpTo++] = termStatistics; } - }; + } + CollectionStatistics collectionStats = searcher.collectionStatistics(field); + return searcher.getSimilarity().scorer(boost, collectionStats, termStats); } @Override @@ -138,7 +147,8 @@ public Scorer scorer(LeafReaderContext context) throws IOException { subIntervals.add(it); } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); - LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit + LeafSimScorer leafScorer = simScorer == null ? null + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index f11861820bd3..89b299734144 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -29,7 +29,7 @@ public final class MatchAllDocsQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { return new ConstantScoreWeight(this, boost) { @Override public String toString() { diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index 74e8bdeaedda..525a18395434 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -42,7 +42,7 @@ public MatchNoDocsQuery(String reason) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index f6154bc30685..2b6bde8a1daa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,13 +193,13 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; - private final Postings minRequiredPostings; + private final int postingsFlags; - public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) + public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; + this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -267,7 +267,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings())); + postings.add(termsEnum.postings(null, this.postingsFlags)); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -345,8 +345,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new MultiPhraseWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new MultiPhraseWeight(searcher, scoreMode, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index c2128927068d..3a46b96411cf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -108,7 +108,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { /** Try to collect terms from the given terms enum and return true iff all @@ -153,7 +153,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); return new WeightOrDocIdSet(weight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 3382dda13902..74218b40b0c3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -62,7 +62,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index af94a03a25e3..360b0175061e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,10 +353,10 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; - private final Postings minRequiredPostings; + private final int postingsFlags; private transient TermStates states[]; - public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) + public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(PhraseQuery.this); final int[] positions = PhraseQuery.this.getPositions(); @@ -366,7 +366,7 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minReq throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; + this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -424,7 +424,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings()); + PostingsEnum postingsEnum = te.postings(null, postingsFlags); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -512,8 +512,8 @@ static float termPositionsCost(TermsEnum termsEnum) throws IOException { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new PhraseWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new PhraseWeight(searcher, scoreMode, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 25095400c336..689d64a50d74 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -106,7 +106,7 @@ protected PointInSetQuery(String field, int numDims, int bytesPerDim, Stream pac } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 683f737bde57..7e48383b4720 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -99,7 +99,7 @@ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index aec1d9dc9d48..22631b393490 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -45,51 +45,6 @@ */ public abstract class Query { - /** - * Enumeration defining what postings information should be retrieved from the - * index for a given Spans - */ - public enum Postings { - NONE { - @Override - public int getRequiredPostings() { - return PostingsEnum.NONE; - } - }, - FREQS { - @Override - public int getRequiredPostings() { - return PostingsEnum.FREQS; - } - }, - POSITIONS { - @Override - public int getRequiredPostings() { - return PostingsEnum.POSITIONS; - } - }, - PAYLOADS { - @Override - public int getRequiredPostings() { - return PostingsEnum.PAYLOADS; - } - }, - OFFSETS { - @Override - public int getRequiredPostings() { - return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS; - } - }; - - public abstract int getRequiredPostings(); - - public Postings atLeast(Postings postings) { - if (postings.compareTo(this) > 0) - return postings; - return this; - } - } - /** Prints a query to a string, with field assumed to be the * default field and omitted. */ @@ -109,7 +64,7 @@ public final String toString() { * @param scoreMode How the produced scorers will be consumed. * @param boost The boost that is propagated by the parent queries. */ - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new UnsupportedOperationException("Query " + this + " does not implement createWeight"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 31a5d108fc37..2c014efc31d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.search; +import org.apache.lucene.index.PostingsEnum; + /** * Different modes of search. */ @@ -29,6 +31,11 @@ public enum ScoreMode { public boolean needsScores() { return true; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.FREQS; + } }, /** @@ -40,6 +47,28 @@ public boolean needsScores() { public boolean needsScores() { return false; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.NONE; + } + }, + + COMPLETE_POSITIONS { + @Override + public boolean needsScores() { + return false; + } + + @Override + public boolean useQueryCache() { + return false; + } + + @Override + public int minRequiredPostings() { + return PostingsEnum.POSITIONS; + } }, /** @@ -51,10 +80,22 @@ public boolean needsScores() { public boolean needsScores() { return true; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.FREQS; + } }; /** * Whether this {@link ScoreMode} needs to compute scores. */ public abstract boolean needsScores(); + + public abstract int minRequiredPostings(); + + public boolean useQueryCache() { + return !needsScores(); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index c9f44f09c681..00ab66610914 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -112,16 +112,16 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { if (scoreMode.needsScores()) { - return new SynonymWeight(this, searcher, minRequiredPostings, boost); + return new SynonymWeight(this, searcher, boost); } else { // if scores are not needed, let BooleanWeight deal with optimizing that case. BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Term term : terms) { bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } - return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); } } @@ -129,9 +129,8 @@ class SynonymWeight extends Weight { private final TermStates termStates[]; private final Similarity similarity; private final Similarity.SimScorer simWeight; - private final Postings minRequiredPostings; - SynonymWeight(Query query, IndexSearcher searcher, Postings minRequiredPostings, float boost) throws IOException { + SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException { super(query); CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field()); long docFreq = 0; @@ -152,7 +151,6 @@ class SynonymWeight extends Weight { } else { this.simWeight = null; // no terms exist at all, we won't use similarity } - this.minRequiredPostings = minRequiredPostings; } @Override @@ -211,7 +209,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, minRequiredPostings, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index e08cada8d184..a8bf5b0679c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -209,7 +209,7 @@ private static class WeightOrDocIdSet { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -273,7 +273,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(t.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); return new WeightOrDocIdSet(weight); } else { assert builder != null; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index e2be41a7131f..6ee9c0a61cbf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -47,16 +47,14 @@ final class TermWeight extends Weight { private final Similarity.SimScorer simScorer; private final TermStates termStates; private final ScoreMode scoreMode; - private final Postings minRequiredPostings; - public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, + public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; this.termStates = termStates; this.similarity = searcher.getSimilarity(); @@ -101,7 +99,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, minRequiredPostings, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -188,7 +186,7 @@ public Term getTerm() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState; if (perReaderTermState == null @@ -199,7 +197,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings termState = this.perReaderTermState; } - return new TermWeight(searcher, scoreMode, minRequiredPostings, boost, termState); + return new TermWeight(searcher, scoreMode, boost, termState); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 89efa028475a..9d2c7192fc4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -45,12 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, Query.Postings minRequiredPostings, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), minRequiredPostings.atLeast(Query.Postings.FREQS).getRequiredPostings()); + impactsEnum = te.impacts(docScorer.getSimScorer(), scoreMode.minRequiredPostings()); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,8 +107,7 @@ public long cost() { } }; } else { - int pf = minRequiredPostings.atLeast(scoreMode.needsScores() ? Query.Postings.FREQS : Query.Postings.NONE).getRequiredPostings(); - postingsEnum = te.postings(null, pf); + postingsEnum = te.postings(null, scoreMode.minRequiredPostings()); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 1abea327ec17..4a4c4fbae993 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -90,8 +90,8 @@ public SpanQuery getMaskedQuery() { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return maskedQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return maskedQuery.createWeight(searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java index 2b600ffe8c41..9556959a3ed2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java @@ -109,8 +109,8 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, minRequiredPostings, SpanBoostQuery.this.boost * boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, SpanBoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index b408b39dcb93..63662994bf14 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -44,9 +44,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index fd79ad60c16a..088e73092de9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -96,7 +96,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new IllegalArgumentException("Rewrite first!"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 199f951aadb8..17b9e5151304 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -178,10 +178,10 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, boost); } @@ -307,7 +307,7 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new SpanGapWeight(searcher, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index e8c74f33763a..6c56df3abee6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -98,9 +98,9 @@ public String toString(String field) { @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight includeWeight = include.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight includeWeight = include.createWeight(searcher, scoreMode, boost); + SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermStates(includeWeight) : null, includeWeight, excludeWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index fb0f0aac7dce..849edaa30e6e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -116,10 +116,10 @@ public int hashCode() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(clauses.size()); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, subWeights, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 75aecc0a1fe0..099b627e1ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -68,8 +68,8 @@ public SpanPositionCheckQuery(SpanQuery match) { protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index b50010fd8b85..ca657b6cff1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query { public abstract String getField(); @Override - public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException; + public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException; /** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 5d8ad6400fac..9ac7afb81ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -65,7 +65,7 @@ public SpanTermQuery(Term term, TermStates termStates) { public String getField() { return term.field(); } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); if (termStates == null || termStates.wasBuiltFor(topContext) == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 7f29612cc710..fba85fe6e86a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -45,9 +45,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index dab8e7923328..8a8379be3432 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -77,7 +77,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(CrazyMustUseBulkScorerQuery.this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 86c92f7cb937..f3382a5bf6c7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -135,8 +135,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return in.createWeight(searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 330fbb76dfb8..80094814cf49 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -85,7 +85,7 @@ public static void teardownIndex() throws IOException { } private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { - Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, Query.Postings.POSITIONS, 1f); + Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_POSITIONS, 1f); int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { Scorer scorer = weight.scorer(ctx); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index d840230ec266..f6b1c7375f03 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -346,7 +346,7 @@ private static class DummyQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -939,7 +939,7 @@ private static class BadQuery extends Query { int[] i = new int[] {42}; // an array so that clone keeps the reference @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1273,7 +1273,7 @@ public void testReaderNotSuitedForCaching() throws IOException { private static class NoCacheQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -1350,7 +1350,7 @@ private static class DummyQuery2 extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1449,7 +1449,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, 1) { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java index 0cb4462a49dc..9352f72f97b7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java @@ -100,8 +100,8 @@ static class AssertNeedsScores extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight w = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight w = in.createWeight(searcher, scoreMode, boost); return new FilterWeight(w) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 64db26e3d351..9348862387d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -252,7 +252,7 @@ public void testPayloadsPos0() throws Exception { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); - Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.PAYLOADS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); + Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { @@ -274,7 +274,7 @@ public void testPayloadsPos0() throws Exception { assertEquals(8, count); // System.out.println("\ngetSpans test"); - Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 9b1460bb7d0e..81855bb4dc84 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -102,7 +102,7 @@ public void testNegativeScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); Scorer s = new SimpleScorer(fake); TopDocsCollector tdc = TopScoreDocCollector.create(scores.length); Collector c = new PositiveScoresOnlyCollector(tdc); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index 9ba43ae6b6ec..eb46ab49e466 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -418,7 +418,7 @@ public FixedScoreQuery(int[] idToNum, boolean reverse) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(FixedScoreQuery.this) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 53ecd49b6984..900267166894 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -122,7 +122,7 @@ public void testGetScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.FREQS, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); scc.setScorer(s); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 22b42f8493b2..59a246cb6647 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -149,7 +149,7 @@ private static class BitSetQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java index 8afaa2db8958..05b016c31c35 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java @@ -229,7 +229,7 @@ public RandomQuery(long seed, float density, List docValues) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index 8f2bcf5bdceb..670df770e67c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -118,7 +118,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(DummyQuery.this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java index 0bffdc7ffb4d..5367dbcd3f05 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java @@ -336,8 +336,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new FilterWeight(query.createWeight(searcher, scoreMode, minRequiredPostings, boost)) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new FilterWeight(query.createWeight(searcher, scoreMode, boost)) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { Scorer scorer = super.scorer(context); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 92e99abb2ad7..3244c1d5ef81 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -94,7 +94,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index 74c9fee00668..f72ea664b937 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -143,7 +143,7 @@ public void testRewrite0() throws Exception { QueryUtils.checkEqual(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); assertEquals(1, terms.size()); } @@ -163,7 +163,7 @@ public Query rewrite(IndexReader reader) { QueryUtils.checkUnequal(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); assertEquals(2, terms.size()); } @@ -177,7 +177,7 @@ public void testRewrite2() throws Exception { QueryUtils.checkEqual(q, qr); HashSet set = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f).extractTerms(set); + qr.createWeight(searcher, ScoreMode.COMPLETE, 1f).extractTerms(set); assertEquals(2, set.size()); } @@ -253,7 +253,7 @@ public void testSpans0() throws Exception { SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,0,1); assertNext(span, 1,1,2); @@ -275,8 +275,8 @@ public void testSpans1() throws Exception { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); - Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); while (spanA.nextDoc() != Spans.NO_MORE_DOCS) { assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc()); @@ -301,7 +301,7 @@ public void testSpans2() throws Exception { new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,1,2); assertNext(span, 2,0,1); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index d38db81b19bd..7cb18cd1ef73 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -123,7 +123,7 @@ public String s(int doc, int start, int end) { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span,0,0,3); assertNext(span,1,0,4); assertFinished(span); @@ -136,7 +136,7 @@ public void testNearSpansNext() throws Exception { */ public void testNearSpansAdvanceLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -148,7 +148,7 @@ public void testNearSpansAdvanceLikeNext() throws Exception { public void testNearSpansNextThenAdvance() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -160,7 +160,7 @@ public void testNearSpansNextThenAdvance() throws Exception { public void testNearSpansNextThenAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -169,13 +169,13 @@ public void testNearSpansNextThenAdvancePast() throws Exception { public void testNearSpansAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } public void testNearSpansAdvanceTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -183,7 +183,7 @@ public void testNearSpansAdvanceTo0() throws Exception { public void testNearSpansAdvanceTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, span.advance(1)); assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); @@ -222,7 +222,7 @@ public void testOrderedSpanIteration() throws Exception { new SpanOrQuery(new SpanTermQuery(new Term(FIELD, "w1")), new SpanTermQuery(new Term(FIELD, "w2"))), new SpanTermQuery(new Term(FIELD, "w4")) }, 10, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,0,0,4); assertNext(spans,0,1,4); assertFinished(spans); @@ -232,7 +232,7 @@ public void testOrderedSpanIterationSameTerms1() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,0,2); assertFinished(spans); } @@ -241,7 +241,7 @@ public void testOrderedSpanIterationSameTerms2() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t2")), new SpanTermQuery(new Term(FIELD, "t1")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,1,4); assertNext(spans,4,2,4); assertFinished(spans); @@ -265,7 +265,7 @@ public void testGaps() throws Exception { .addGap(1) .addClause(new SpanTermQuery(new Term(FIELD, "w2"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 1, 0, 3); assertNext(spans, 2, 0, 3); assertFinished(spans); @@ -278,7 +278,7 @@ public void testGaps() throws Exception { .addClause(new SpanTermQuery(new Term(FIELD, "w3"))) .setSlop(1) .build(); - spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 2, 0, 5); assertNext(spans, 3, 0, 6); assertFinished(spans); @@ -290,7 +290,7 @@ public void testMultipleGaps() throws Exception { .addGap(2) .addClause(new SpanTermQuery(new Term(FIELD, "g"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 5, 0, 4); assertNext(spans, 5, 9, 13); assertFinished(spans); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index 6c2d28c5c0b1..fa0bf1952d42 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -121,7 +121,7 @@ public void testNestedNearQuery() throws IOException { SpanNearQuery q7 = new SpanNearQuery(new SpanQuery[]{q1, q6}, 1, true); TermCollector collector = new TermCollector(); - Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.advance(0)); spans.nextStartPosition(); checkCollectedTerms(spans, collector, new Term(FIELD, "w1"), new Term(FIELD, "w2"), new Term(FIELD, "w3")); @@ -141,7 +141,7 @@ public void testOrQuery() throws IOException { SpanOrQuery orQuery = new SpanOrQuery(q2, q3); TermCollector collector = new TermCollector(); - Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.advance(1)); spans.nextStartPosition(); @@ -171,7 +171,7 @@ public void testSpanNotQuery() throws IOException { SpanNotQuery notq = new SpanNotQuery(nq, q3); TermCollector collector = new TermCollector(); - Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(2, spans.advance(2)); spans.nextStartPosition(); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java index f7b408877bd5..b4cad767706d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java @@ -73,7 +73,7 @@ void checkHits(Query query, int[] results) throws Exception { } Spans makeSpans(SpanQuery sq) throws Exception { - return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } void tstEqualSpans(String mes, SpanQuery expectedQ, SpanQuery actualQ) throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index d8b9f9216a8a..151c8ee16108 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -194,7 +194,7 @@ public void testSpanNearOrderedEqual15() throws Exception { public void testSpanNearOrderedOverlap() throws Exception { final SpanQuery query = spanNearOrderedQuery(field, 1, "t1", "t2", "t3"); - Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals("first doc", 11, spans.nextDoc()); assertEquals("first start", 0, spans.nextStartPosition()); @@ -209,7 +209,7 @@ public void testSpanNearOrderedOverlap() throws Exception { public void testSpanNearUnOrdered() throws Exception { //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test SpanQuery senq = spanNearUnorderedQuery(field, 0, "u1", "u2"); - Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 1, 3); assertNext(spans, 5, 2, 4); assertNext(spans, 8, 2, 4); @@ -218,7 +218,7 @@ public void testSpanNearUnOrdered() throws Exception { assertFinished(spans); senq = spanNearUnorderedQuery(1, senq, spanTermQuery(field, "u2")); - spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 0, 3); assertNext(spans, 4, 1, 3); // unordered spans can be subsets assertNext(spans, 5, 0, 4); @@ -232,7 +232,7 @@ public void testSpanNearUnOrdered() throws Exception { } private Spans orSpans(String[] terms) throws Exception { - return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } public void testSpanOrEmpty() throws Exception { @@ -414,7 +414,7 @@ private int spanCount(String include, int slop, String exclude, int pre, int pos SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms); SpanQuery eq = spanTermQuery(field, exclude); SpanQuery snq = spanNotQuery(iq, eq, pre, post); - Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); int i = 0; if (spans != null) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 4d98f89017a2..2a6376df1f17 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -52,9 +52,9 @@ public AssertingIndexSearcher(Random random, IndexReaderContext context, Execut } @Override - public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { // this adds assertions to the inner weights/scorers too - return new AssertingWeight(random, super.createWeight(query, scoreMode, minRequiredPostings, boost), scoreMode); + return new AssertingWeight(random, super.createWeight(query, scoreMode, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java index e136eaa1023e..b3d2f8116c44 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java @@ -39,9 +39,9 @@ public static Query wrap(Random random, Query query) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { assert boost >= 0; - return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, minRequiredPostings, boost), scoreMode); + return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 98e56a255875..4b982bb45a8d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -73,8 +73,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight inWeight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight inWeight = query.createWeight(searcher, scoreMode, boost); if (scoreMode.needsScores() == false) { return inWeight; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index 8c408b17276c..fcb48a8d0e84 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -62,8 +62,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight weight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight weight = query.createWeight(searcher, scoreMode, boost); return new RandomApproximationWeight(weight, new Random(random.nextLong())); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index bcd9bf1563dc..f24a4ff8fe37 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -43,8 +43,8 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight weight = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight weight = in.createWeight(searcher, scoreMode, boost); return new AssertingSpanWeight(searcher, weight); } diff --git a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java index d36f3e21c72a..2f0f067c5136 100644 --- a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java +++ b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java @@ -72,8 +72,8 @@ public BrokenExplainTermQuery(Term t, boolean toggleExplainMatch, boolean breakE this.breakExplainScores = breakExplainScores; } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, minRequiredPostings, boost)); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, boost)); } } From 4e7d5ba1bd74d54f8b29045f390af8257a84d574 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 10:22:43 +0000 Subject: [PATCH 07/83] cleanup --- .../org/apache/lucene/index/PostingsEnum.java | 4 ---- .../org/apache/lucene/search/BooleanWeight.java | 1 - .../lucene/search/ConstantScoreWeight.java | 1 - .../org/apache/lucene/search/DisiWrapper.java | 16 ---------------- .../apache/lucene/search/DoubleValuesSource.java | 1 - .../org/apache/lucene/search/FilterWeight.java | 2 +- .../src/java/org/apache/lucene/search/Query.java | 2 -- .../org/apache/lucene/search/QueryRescorer.java | 1 - .../java/org/apache/lucene/search/TermQuery.java | 1 - .../search/spans/TestNearSpansOrdered.java | 1 - .../lucene/search/spans/TestSpanCollection.java | 1 - .../org/apache/lucene/search/QueryUtils.java | 1 - .../lucene/search/ScorerIndexSearcher.java | 1 - 13 files changed, 1 insertion(+), 32 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index bb93268ff92b..fdd32a9f2fe0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -63,10 +63,6 @@ public static boolean featureRequested(int flags, short feature) { return (flags & feature) == feature; } - public static short highest(short a, short b) { - return (short) Math.max(a, b); - } - private AttributeSource atts = null; /** Sole constructor. (For invocation by subclass diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 829d72a5ff19..fffdd09093f1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index cdf4be94f3c8..671ec7103782 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -21,7 +21,6 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; /** diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index b8891b7c0ce2..0a581804aaa3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -82,21 +82,5 @@ public DisiWrapper(Spans spans) { this.lastApproxMatchDoc = -2; } - public DisiWrapper(DocIdSetIterator disi) { - this.scorer = null; - this.spans = null; - this.iterator = disi; - this.cost = iterator.cost(); - this.doc = -1; - this.twoPhaseView = TwoPhaseIterator.unwrap(disi); - if (twoPhaseView != null) { - approximation = twoPhaseView.approximation(); - matchCost = twoPhaseView.matchCost(); - } - else { - approximation = iterator; - matchCost = 0f; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 3c52172019dd..3938d3f9ba22 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; /** * Base class for producing {@link DoubleValues} diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 278ad987a225..925c9534f898 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -47,7 +47,7 @@ protected FilterWeight(Weight weight) { /** * Alternative constructor. * Use this variant only if the weight was not obtained - * via the {@link Query#createWeight(IndexSearcher, ScoreMode, Query.Postings, float)} + * via the {@link Query#createWeight(IndexSearcher, ScoreMode, float)} * method of the query object. */ protected FilterWeight(Query query, Weight weight) { diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index 22631b393490..54de63fc02fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -20,8 +20,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.search.spans.SpanWeight; /** The abstract base class for queries.

    Instantiable subclasses are: diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index e98099691b09..6b19f295a7d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -23,7 +23,6 @@ import java.util.List; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 6ee9c0a61cbf..79dd976b7789 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 7cb18cd1ef73..072d3818490a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -28,7 +28,6 @@ import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TopDocs; diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index fa0bf1952d42..ff9327526d22 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -31,7 +31,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index 71592a8a3701..fa113113f81a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -32,7 +32,6 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index 97c5c7a1338e..ae699130190d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.Bits; /** From 66abdd68dd68b5e71010a12b097a7a63961317f0 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 11:52:56 +0000 Subject: [PATCH 08/83] Test scoring + fix compared with phrase query --- .../org/apache/lucene/search/IntervalIterator.java | 2 +- .../org/apache/lucene/search/IntervalQuery.java | 2 +- .../org/apache/lucene/search/TestIntervalQuery.java | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a65aa1d87c5c..b203395b38a9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -34,7 +34,7 @@ public interface IntervalIterator { int nextInterval() throws IOException; default float score() { - return (float) (1.0 / (1.0 + (end() - start()))); + return (float) (1.0 / (end() - start())); } IntervalIterator EMPTY = new IntervalIterator() { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index f409de96817f..6de2e19c5f51 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -148,7 +148,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = simScorer == null ? null - : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 77029ffffc4b..09259cd182f1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -71,6 +71,19 @@ private void checkHits(Query query, int[] results) throws IOException { CheckHits.checkHits(random(), query, field, searcher, results); } + public void testScoring() throws IOException { + PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); + Query equiv = IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + + TopDocs td1 = searcher.search(pq, 10); + TopDocs td2 = searcher.search(equiv, 10); + assertEquals(td1.totalHits, td2.totalHits); + for (int i = 0; i < td1.scoreDocs.length; i++) { + assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc); + assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 0f); + } + } + public void testOrderedNearQueryWidth0() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), From 258e5e524a3b3ab760a3c5b10c1fda17b5b7056b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 14:06:52 +0000 Subject: [PATCH 09/83] Add some difference intervals --- .../search/IntervalDifferenceFunction.java | 133 ++++++++++++++ .../search/IntervalDifferenceQuery.java | 166 ++++++++++++++++++ .../lucene/search/IntervalFunction.java | 46 ++++- .../lucene/search/IntervalIterator.java | 2 +- .../apache/lucene/search/IntervalQuery.java | 55 ++++-- .../apache/lucene/search/IntervalScorer.java | 40 ++++- .../org/apache/lucene/search/Intervals.java | 61 ++++++- .../lucene/search/TestIntervalQuery.java | 31 +++- .../apache/lucene/search/TestIntervals.java | 11 +- 9 files changed, 506 insertions(+), 39 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java new file mode 100644 index 000000000000..c1094bd3ffca --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public abstract class IntervalDifferenceFunction { + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + + @Override + public abstract String toString(); + + public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); + + public static final IntervalDifferenceFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return Intervals.difference(minuend, subtrahend); + } + }; + + public static class NotWithinFunction extends IntervalDifferenceFunction { + + private final int positions; + + public NotWithinFunction(int positions) { + this.positions = positions; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NotWithinFunction that = (NotWithinFunction) o; + return positions == that.positions; + } + + @Override + public String toString() { + return "NOTWITHIN/" + positions; + } + + @Override + public int hashCode() { + return Objects.hash(positions); + } + + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + IntervalIterator notWithin = new IntervalIterator() { + @Override + public int start() { + int start = subtrahend.start(); + return Math.max(0, start - positions); + } + + @Override + public int end() { + int end = subtrahend.end(); + int newEnd = end + positions; + if (newEnd < 0) // check for overflow + return Integer.MAX_VALUE; + return newEnd; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean reset(int doc) throws IOException { + return subtrahend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + return subtrahend.nextInterval(); + } + }; + return NOT_CONTAINING.apply(minuend, notWithin); + } + } + + private static abstract class SingletonFunction extends IntervalDifferenceFunction { + + private final String name; + + protected SingletonFunction(String name) { + this.name = name; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return obj == this; + } + + @Override + public String toString() { + return name; + } + + } + + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java new file mode 100644 index 000000000000..99d5274e51d5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; + +public class IntervalDifferenceQuery extends Query { + + public static IntervalDifferenceQuery notContaining(String field, Query minuend, Query subtrahend) { + return new IntervalDifferenceQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NOT_CONTAINING); + } + + public static IntervalDifferenceQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { + return new IntervalDifferenceQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + } + + private final Query minuend; + private final Query subtrahend; + private final IntervalDifferenceFunction function; + private final String field; + + protected IntervalDifferenceQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + this.minuend = minuend; + this.subtrahend = subtrahend; + this.function = function; + this.field = field; + } + + @Override + public String toString(String field) { + return function + "(" + minuend + ", " + subtrahend + ")"; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight minuendWeight = searcher.createWeight(minuend, ScoreMode.COMPLETE_POSITIONS, 1); + Weight subtrahendWeight = searcher.createWeight(subtrahend, ScoreMode.COMPLETE_POSITIONS, 1); + return new IntervalDifferenceWeight(minuendWeight, subtrahendWeight, scoreMode, + searcher.getSimilarity(), IntervalQuery.buildSimScorer(field, searcher, Collections.singletonList(minuendWeight), boost)); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewrittenMinuend = minuend.rewrite(reader); + Query rewrittenSubtrahend = subtrahend.rewrite(reader); + if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { + return new IntervalDifferenceQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + } + return this; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IntervalDifferenceQuery that = (IntervalDifferenceQuery) o; + return Objects.equals(minuend, that.minuend) && + Objects.equals(subtrahend, that.subtrahend) && + Objects.equals(function, that.function); + } + + @Override + public int hashCode() { + return Objects.hash(minuend, subtrahend, function); + } + + private class IntervalDifferenceWeight extends Weight { + + final Weight minuendWeight; + final Weight subtrahendWeight; + final ScoreMode scoreMode; + final Similarity similarity; + final Similarity.SimScorer simScorer; + + private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, + Similarity similarity, Similarity.SimScorer simScorer) { + super(IntervalDifferenceQuery.this); + this.minuendWeight = minuendWeight; + this.subtrahendWeight = subtrahendWeight; + this.scoreMode = scoreMode; + this.similarity = similarity; + this.simScorer = simScorer; + } + + @Override + public void extractTerms(Set terms) { + this.minuendWeight.extractTerms(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + IntervalScorer scorer = (IntervalScorer) scorer(context); + if (scorer != null) { + int newDoc = scorer.iterator().advance(doc); + if (newDoc == doc) { + return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); + } + } + return Explanation.noMatch("no matching intervals"); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + Scorer minuendScorer = minuendWeight.scorer(context); + Scorer subtrahendScorer = subtrahendWeight.scorer(context); + if (subtrahendScorer == null || minuendScorer == null) + return minuendScorer; + + IntervalIterator minuendIt = minuendScorer.intervals(field); + IntervalIterator subtrahendIt = subtrahendScorer.intervals(field); + if (subtrahendIt == IntervalIterator.EMPTY || subtrahendIt == null) + return minuendScorer; + + LeafSimScorer leafScorer = simScorer == null ? null + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); + + return new IntervalScorer(this, field, minuendScorer.iterator(), function.apply(minuendIt, subtrahendIt), leafScorer){ + @Override + public TwoPhaseIterator twoPhaseIterator() { + return new TwoPhaseIterator(approximation) { + @Override + public boolean matches() throws IOException { + if (subtrahendScorer.docID() < approximation.docID()) { + subtrahendScorer.iterator().advance(approximation.docID()); + } + return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + }; + } + }; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return minuendWeight.isCacheable(ctx) && subtrahendWeight.isCacheable(ctx); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index f3adf6c02076..87cca4ba8b1a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -17,11 +17,12 @@ package org.apache.lucene.search; +import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Objects; -import java.util.function.Function; -public abstract class IntervalFunction implements Function, IntervalIterator> { +public abstract class IntervalFunction { @Override public abstract int hashCode(); @@ -32,6 +33,15 @@ public abstract class IntervalFunction implements Function iterators); + + public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.orderedIntervalIterator(intervalIterators); + } + }; + public static class OrderedNearFunction extends IntervalFunction { public OrderedNearFunction(int minWidth, int maxWidth) { @@ -67,6 +77,13 @@ public int hashCode() { } } + public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.unorderedIntervalIterator(intervalIterators); + } + }; + public static class UnorderedNearFunction extends IntervalFunction { final int minWidth; @@ -103,4 +120,29 @@ public int hashCode() { } } + private static abstract class SingletonFunction extends IntervalFunction { + + private final String name; + + protected SingletonFunction(String name) { + this.name = name; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return obj == this; + } + + @Override + public String toString() { + return name; + } + + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index b203395b38a9..b14211e0eea1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -34,7 +34,7 @@ public interface IntervalIterator { int nextInterval() throws IOException; default float score() { - return (float) (1.0 / (end() - start())); + return (float) (1.0 / (1 + innerWidth())); } IntervalIterator EMPTY = new IntervalIterator() { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 6de2e19c5f51..6903626257b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -27,26 +27,45 @@ import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; public final class IntervalQuery extends Query { - private final String field; - private final List subQueries; - private final IntervalFunction iteratorFunction; - - public static IntervalQuery orderedNearQuery(String field, int width, Query... subQueries) { + public static IntervalQuery ordered(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } - public static IntervalQuery unorderedNearQuery(String field, int width, Query... subQueries) { + public static IntervalQuery ordered(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + } + + public static IntervalQuery ordered(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); + } + + public static IntervalQuery unordered(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } + public static IntervalQuery unordered(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); + } + + public static IntervalQuery unordered(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); + } + + private final String field; + private final List subQueries; + private final IntervalFunction iteratorFunction; + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + this(field, subQueries, null, iteratorFunction); + } + + protected IntervalQuery(String field, List subQueries, Query subtrahend, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; this.iteratorFunction = iteratorFunction; @@ -68,13 +87,14 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo for (Query q : subQueries) { subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); } - return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(searcher, subWeights, boost) : null, scoreMode); + return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(field, searcher, subWeights, boost) : null, + searcher.getSimilarity(), scoreMode); } - private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights, float boost) throws IOException { + static Similarity.SimScorer buildSimScorer(String field, IndexSearcher searcher, List subWeights, float boost) throws IOException { Set terms = new HashSet<>(); for (Weight w : subWeights) { - w.extractTerms(terms); // nocommit can we do this without building TermStates twice? + w.extractTerms(terms); } TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; @@ -107,12 +127,14 @@ private class IntervalWeight extends Weight { final List subWeights; final Similarity.SimScorer simScorer; + final Similarity similarity; final ScoreMode scoreMode; - public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, ScoreMode scoreMode) { + public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { super(query); this.subWeights = subWeights; this.simScorer = simScorer; + this.similarity = similarity; this.scoreMode = scoreMode; } @@ -125,11 +147,14 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); - if (scorer != null && scorer.iterator().advance(doc) == doc) { - return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this + IntervalScorer scorer = (IntervalScorer) scorer(context); + if (scorer != null) { + int newDoc = scorer.iterator().advance(doc); + if (newDoc == doc) { + return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); + } } - return Explanation.noMatch("No matching intervals"); + return Explanation.noMatch("no matching intervals"); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index e9cdc1aa4402..fb9476350618 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -21,12 +21,16 @@ class IntervalScorer extends Scorer { - private final IntervalIterator intervals; + protected final IntervalIterator intervals; private final String field; - private final DocIdSetIterator approximation; + protected final DocIdSetIterator approximation; private final LeafSimScorer simScorer; - protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, IntervalIterator intervals, LeafSimScorer simScorer) { + private float freq = -1; + private int lastScoredDoc = -1; + + protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, + IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; this.approximation = approximation; @@ -41,13 +45,35 @@ public int docID() { @Override public float score() throws IOException { - float freq = 0; - do { - freq += intervals.score(); - } while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + ensureFreq(); return simScorer.score(docID(), freq); } + public Explanation explain(String topLevel) throws IOException { + ensureFreq(); + Explanation freqExplanation = Explanation.match(freq, "intervalFreq=" + freq); + Explanation scoreExplanation = simScorer.explain(docID(), freqExplanation); + return Explanation.match(scoreExplanation.getValue(), + topLevel + ", result of:", + scoreExplanation); + } + + public float freq() throws IOException { + ensureFreq(); + return freq; + } + + private void ensureFreq() throws IOException { + if (lastScoredDoc != docID()) { + lastScoredDoc = docID(); + freq = 0; + do { + freq += intervals.score(); + } + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + } + } + @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index d7bd588728b7..e9c32ee6c174 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -232,7 +232,10 @@ public boolean reset(int doc) throws IOException { positioned &= subIterator.reset(doc); subIterator.nextInterval(); queue.add(subIterator); - queueEnd = Math.max(queueEnd, subIterator.end()); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } } return positioned; } @@ -273,4 +276,60 @@ public int nextInterval() throws IOException { } + public static IntervalIterator difference(IntervalIterator minuend, IntervalIterator subtrahend) { + return new DifferenceIterator(minuend, subtrahend); + } + + private static class DifferenceIterator implements IntervalIterator { + + final IntervalIterator minuend; + final IntervalIterator subtrahend; + boolean subPositioned; + + private DifferenceIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + this.minuend = minuend; + this.subtrahend = subtrahend; + } + + @Override + public int start() { + return minuend.start(); + } + + @Override + public int end() { + return minuend.end(); + } + + @Override + public int innerWidth() { + return minuend.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + subPositioned = subtrahend.reset(doc); + if (subPositioned) + subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; + return minuend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (subPositioned == false) + return minuend.nextInterval(); + while (minuend.nextInterval() != NO_MORE_INTERVALS) { + while (subtrahend.end() < minuend.start()) { + if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { + subPositioned = false; + return minuend.start(); + } + } + if (subtrahend.start() > minuend.end()) + return minuend.start(); + } + return NO_MORE_INTERVALS; + } + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 09259cd182f1..5b943cd67237 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,28 +85,28 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = IntervalQuery.orderedNearQuery(field, 1, + Query q = IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), - IntervalQuery.orderedNearQuery(field, 2, + IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))) ); @@ -114,4 +114,23 @@ public void testNestedOrderedNearQuery() throws IOException { checkHits(q, new int[]{0, 1, 2}); } + public void testUnorderedQuery() throws IOException { + Query q = IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + checkHits(q, new int[]{0, 1, 2, 3, 5}); + } + + public void testNotContainingQuery() throws IOException { + Query q = IntervalDifferenceQuery.notContaining(field, + IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{0, 2, 4, 5}); + } + + public void testNotWithinQuery() throws IOException { + Query q = IntervalDifferenceQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + new TermQuery(new Term(field, "w2"))); + checkHits(q, new int[]{ 1, 2, 3 }); + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 80094814cf49..1c2be22c840a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -18,9 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.Arrays; -import com.carrotsearch.randomizedtesting.annotations.Seed; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -31,7 +29,6 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -129,7 +126,7 @@ public void testTermQueryIntervals() throws IOException { } public void testOrderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.ordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 3, new int[][]{ {}, @@ -142,9 +139,9 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.unordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), - "field1", 3, new int[][]{ + "field1", 4, new int[][]{ {}, { 0, 2, 2, 3, 6, 17 }, { 3, 5, 5, 6, 6, 21 }, @@ -169,7 +166,7 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.unordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "porridge")), new BooleanQuery.Builder() From 07c1f24843718f0b230eb11bb474063815692f83 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 15:45:10 +0000 Subject: [PATCH 10/83] difference -> non_overlapping --- ...Query.java => ContainingIntervalQuery.java} | 18 +++++++++--------- .../search/IntervalDifferenceFunction.java | 7 +++---- .../org/apache/lucene/search/Intervals.java | 8 ++++---- .../lucene/search/TestIntervalQuery.java | 4 ++-- 4 files changed, 18 insertions(+), 19 deletions(-) rename lucene/core/src/java/org/apache/lucene/search/{IntervalDifferenceQuery.java => ContainingIntervalQuery.java} (90%) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java similarity index 90% rename from lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java rename to lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java index 99d5274e51d5..93a3ff851228 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java @@ -27,14 +27,14 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; -public class IntervalDifferenceQuery extends Query { +public class ContainingIntervalQuery extends Query { - public static IntervalDifferenceQuery notContaining(String field, Query minuend, Query subtrahend) { - return new IntervalDifferenceQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NOT_CONTAINING); + public static ContainingIntervalQuery nonOverlapping(String field, Query minuend, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); } - public static IntervalDifferenceQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { - return new IntervalDifferenceQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + public static ContainingIntervalQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); } private final Query minuend; @@ -42,7 +42,7 @@ public static IntervalDifferenceQuery notWithin(String field, Query minuend, int private final IntervalDifferenceFunction function; private final String field; - protected IntervalDifferenceQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + protected ContainingIntervalQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; @@ -67,7 +67,7 @@ public Query rewrite(IndexReader reader) throws IOException { Query rewrittenMinuend = minuend.rewrite(reader); Query rewrittenSubtrahend = subtrahend.rewrite(reader); if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new IntervalDifferenceQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + return new ContainingIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); } return this; } @@ -76,7 +76,7 @@ public Query rewrite(IndexReader reader) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - IntervalDifferenceQuery that = (IntervalDifferenceQuery) o; + ContainingIntervalQuery that = (ContainingIntervalQuery) o; return Objects.equals(minuend, that.minuend) && Objects.equals(subtrahend, that.subtrahend) && Objects.equals(function, that.function); @@ -97,7 +97,7 @@ private class IntervalDifferenceWeight extends Weight { private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, Similarity similarity, Similarity.SimScorer simScorer) { - super(IntervalDifferenceQuery.this); + super(ContainingIntervalQuery.this); this.minuendWeight = minuendWeight; this.subtrahendWeight = subtrahendWeight; this.scoreMode = scoreMode; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java index c1094bd3ffca..656479fea911 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.List; import java.util.Objects; public abstract class IntervalDifferenceFunction { @@ -34,10 +33,10 @@ public abstract class IntervalDifferenceFunction { public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); - public static final IntervalDifferenceFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return Intervals.difference(minuend, subtrahend); + return Intervals.nonOverlapping(minuend, subtrahend); } }; @@ -100,7 +99,7 @@ public int nextInterval() throws IOException { return subtrahend.nextInterval(); } }; - return NOT_CONTAINING.apply(minuend, notWithin); + return NON_OVERLAPPING.apply(minuend, notWithin); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index e9c32ee6c174..e8e16744fe07 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -276,17 +276,17 @@ public int nextInterval() throws IOException { } - public static IntervalIterator difference(IntervalIterator minuend, IntervalIterator subtrahend) { - return new DifferenceIterator(minuend, subtrahend); + public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NonOverlappingIterator(minuend, subtrahend); } - private static class DifferenceIterator implements IntervalIterator { + private static class NonOverlappingIterator implements IntervalIterator { final IntervalIterator minuend; final IntervalIterator subtrahend; boolean subPositioned; - private DifferenceIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { this.minuend = minuend; this.subtrahend = subtrahend; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 5b943cd67237..110b03df3864 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -120,7 +120,7 @@ public void testUnorderedQuery() throws IOException { } public void testNotContainingQuery() throws IOException { - Query q = IntervalDifferenceQuery.notContaining(field, + Query q = ContainingIntervalQuery.nonOverlapping(field, IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new TermQuery(new Term(field, "w3"))); @@ -128,7 +128,7 @@ public void testNotContainingQuery() throws IOException { } public void testNotWithinQuery() throws IOException { - Query q = IntervalDifferenceQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + Query q = ContainingIntervalQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, new TermQuery(new Term(field, "w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } From 7038656d123fe1039604cfdd61e6172225ae078f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 15:58:07 +0000 Subject: [PATCH 11/83] Rearrange things a bit --- .../search/ContainingIntervalQuery.java | 8 - .../search/IntervalDifferenceFunction.java | 60 +++- .../apache/lucene/search/IntervalFilter.java | 20 ++ .../lucene/search/IntervalFunction.java | 186 ++++++++++- .../apache/lucene/search/IntervalQuery.java | 24 -- .../org/apache/lucene/search/Intervals.java | 305 +----------------- .../org/apache/lucene/search/TermScorer.java | 56 +++- .../lucene/search/TestIntervalQuery.java | 20 +- .../apache/lucene/search/TestIntervals.java | 6 +- 9 files changed, 346 insertions(+), 339 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java index 93a3ff851228..8d564942e864 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java @@ -29,14 +29,6 @@ public class ContainingIntervalQuery extends Query { - public static ContainingIntervalQuery nonOverlapping(String field, Query minuend, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); - } - - public static ContainingIntervalQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); - } - private final Query minuend; private final Query subtrahend; private final IntervalDifferenceFunction function; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java index 656479fea911..6b5d58c9fe8d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Objects; +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + public abstract class IntervalDifferenceFunction { @Override @@ -36,10 +38,66 @@ public abstract class IntervalDifferenceFunction { public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return Intervals.nonOverlapping(minuend, subtrahend); + return nonOverlapping(minuend, subtrahend); } }; + public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NonOverlappingIterator(minuend, subtrahend); + } + + private static class NonOverlappingIterator implements IntervalIterator { + + final IntervalIterator minuend; + final IntervalIterator subtrahend; + boolean subPositioned; + + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + this.minuend = minuend; + this.subtrahend = subtrahend; + } + + @Override + public int start() { + return minuend.start(); + } + + @Override + public int end() { + return minuend.end(); + } + + @Override + public int innerWidth() { + return minuend.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + subPositioned = subtrahend.reset(doc); + if (subPositioned) + subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; + return minuend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (subPositioned == false) + return minuend.nextInterval(); + while (minuend.nextInterval() != NO_MORE_INTERVALS) { + while (subtrahend.end() < minuend.start()) { + if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { + subPositioned = false; + return minuend.start(); + } + } + if (subtrahend.start() > minuend.end()) + return minuend.start(); + } + return NO_MORE_INTERVALS; + } + } + public static class NotWithinFunction extends IntervalDifferenceFunction { private final int positions; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index b2b930db570e..4f33a85861f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -21,6 +21,26 @@ public abstract class IntervalFilter implements IntervalIterator { + public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = end() - start(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = innerWidth(); + return width >= minWidth && width <= maxWidth; + } + }; + } + private final IntervalIterator in; public IntervalFilter(IntervalIterator in) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 87cca4ba8b1a..5a94a25d0a33 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -22,6 +22,10 @@ import java.util.List; import java.util.Objects; +import org.apache.lucene.util.PriorityQueue; + +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + public abstract class IntervalFunction { @Override @@ -38,7 +42,7 @@ public abstract class IntervalFunction { public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.orderedIntervalIterator(intervalIterators); + return orderedIntervalIterator(intervalIterators); } }; @@ -54,7 +58,7 @@ public OrderedNearFunction(int minWidth, int maxWidth) { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.innerWidthFilter(Intervals.orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + return IntervalFilter.innerWidthFilter(orderedIntervalIterator(intervalIterators), minWidth, maxWidth); } @Override @@ -77,10 +81,87 @@ public int hashCode() { } } + public static IntervalIterator orderedIntervalIterator(List subIterators) { + for (IntervalIterator it : subIterators) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new OrderedIntervalIterator(subIterators); + } + + private static class OrderedIntervalIterator implements IntervalIterator { + + final List subIntervals; + + int start; + int end; + int innerWidth; + int i; + + private OrderedIntervalIterator(List subIntervals) { + this.subIntervals = subIntervals; + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerWidth; + } + + @Override + public boolean reset(int doc) throws IOException { + boolean positioned = true; + for (IntervalIterator it : subIntervals) { + positioned &= it.reset(doc); + } + subIntervals.get(0).nextInterval(); + i = 1; + start = end = innerWidth = Integer.MIN_VALUE; + return positioned; + } + + @Override + public int nextInterval() throws IOException { + start = end = NO_MORE_INTERVALS; + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + if (subIntervals.get(i - 1).end() >= b) + return start; + if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + break; + do { + if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + return start; + } + while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + i++; + } + start = subIntervals.get(0).start(); + end = subIntervals.get(subIntervals.size() - 1).end(); + b = subIntervals.get(subIntervals.size() - 1).start(); + innerWidth = b - subIntervals.get(0).end() - 1; + i = 1; + if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + return start; + } + } + } + + public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.unorderedIntervalIterator(intervalIterators); + return unorderedIntervalIterator(intervalIterators); } }; @@ -96,7 +177,7 @@ public UnorderedNearFunction(int minWidth, int maxWidth) { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.innerWidthFilter(Intervals.unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); + return IntervalFilter.innerWidthFilter(unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); } @Override @@ -120,6 +201,103 @@ public int hashCode() { } } + public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + for (IntervalIterator it : subIntervals) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new UnorderedIntervalIterator(subIntervals); + } + + private static class UnorderedIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + int start, end, innerStart, innerEnd, queueEnd; + + UnorderedIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerEnd - innerStart + 1; + } + + @Override + public boolean reset(int doc) throws IOException { + this.queue.clear(); + this.queueEnd = start = end = innerEnd = innerStart = -1; + boolean positioned = true; + for (IntervalIterator subIterator : subIterators) { + positioned &= subIterator.reset(doc); + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } + } + return positioned; + } + + void updateRightExtreme(IntervalIterator it) { + int itEnd = it.end(); + if (itEnd > queueEnd) { + queueEnd = itEnd; + innerEnd = it.start(); + } + } + + @Override + public int nextInterval() throws IOException { + while (this.queue.size() == subIterators.length && queue.top().start() == start) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } + if (this.queue.size() < subIterators.length) + return NO_MORE_INTERVALS; + do { + start = queue.top().start(); + innerStart = queue.top().end(); + end = queueEnd; + if (queue.top().end() == end) + return start; + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && end == queueEnd); + return start; + } + + } + private static abstract class SingletonFunction extends IntervalFunction { private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 6903626257b2..dfab7da1e3c7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -33,30 +33,6 @@ public final class IntervalQuery extends Query { - public static IntervalQuery ordered(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); - } - - public static IntervalQuery ordered(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); - } - - public static IntervalQuery ordered(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); - } - - public static IntervalQuery unordered(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); - } - - public static IntervalQuery unordered(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); - } - - public static IntervalQuery unordered(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); - } - private final String field; private final List subQueries; private final IntervalFunction iteratorFunction; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index e8e16744fe07..ab12bad0d655 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.lucene.index.PostingsEnum; @@ -27,309 +28,37 @@ public final class Intervals { public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = end() - start(); - return width >= minWidth && width <= maxWidth; - } - }; + public static Query orderedQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } - public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = innerWidth(); - return width >= minWidth && width <= maxWidth; - } - }; + public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } - public static IntervalIterator termIterator(PostingsEnum pe) { - return new TermIntervalIterator(pe); + public static Query orderedQuery(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); } - private static class TermIntervalIterator implements IntervalIterator { - - public TermIntervalIterator(PostingsEnum pe) { - this.pe = pe; - } - - private final PostingsEnum pe; - - int upTo = -1; - int pos = -1; - - @Override - public int start() { - return pos; - } - - @Override - public int end() { - return pos; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) throws IOException { - if (pe.docID() == doc) { - upTo = pe.freq(); - pos = -1; - return true; - } - upTo = -1; - return false; - } - - @Override - public int nextInterval() throws IOException { - if (upTo <= 0) { - return pos = NO_MORE_INTERVALS; - } - upTo--; - return pos = pe.nextPosition(); - } - - @Override - public String toString() { - return pe.docID() + "[" + pos + "]"; - } + public static Query unorderedQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } - public static IntervalIterator orderedIntervalIterator(List subIterators) { - for (IntervalIterator it : subIterators) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } - return new OrderedIntervalIterator(subIterators); - } - - private static class OrderedIntervalIterator implements IntervalIterator { - - final List subIntervals; - - int start; - int end; - int innerWidth; - int i; - - private OrderedIntervalIterator(List subIntervals) { - this.subIntervals = subIntervals; - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int innerWidth() { - return innerWidth; - } - - @Override - public boolean reset(int doc) throws IOException { - boolean positioned = true; - for (IntervalIterator it : subIntervals) { - positioned &= it.reset(doc); - } - subIntervals.get(0).nextInterval(); - i = 1; - start = end = innerWidth = Integer.MIN_VALUE; - return positioned; - } - - @Override - public int nextInterval() throws IOException { - start = end = NO_MORE_INTERVALS; - int b = Integer.MAX_VALUE; - while (true) { - while (true) { - if (subIntervals.get(i - 1).end() >= b) - return start; - if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) - break; - do { - if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) - return start; - } - while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); - i++; - } - start = subIntervals.get(0).start(); - end = subIntervals.get(subIntervals.size() - 1).end(); - b = subIntervals.get(subIntervals.size() - 1).start(); - innerWidth = b - subIntervals.get(0).end() - 1; - i = 1; - if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) - return start; - } - } + public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } - public static IntervalIterator unorderedIntervalIterator(List subIntervals) { - for (IntervalIterator it : subIntervals) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } - return new UnorderedIntervalIterator(subIntervals); + public static Query unorderedQuery(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); } - private static class UnorderedIntervalIterator implements IntervalIterator { - - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; - - int start, end, innerStart, innerEnd, queueEnd; - - UnorderedIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); - } - }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int innerWidth() { - return innerEnd - innerStart + 1; - } - - @Override - public boolean reset(int doc) throws IOException { - this.queue.clear(); - this.queueEnd = start = end = innerEnd = innerStart = -1; - boolean positioned = true; - for (IntervalIterator subIterator : subIterators) { - positioned &= subIterator.reset(doc); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); - } - } - return positioned; - } - - void updateRightExtreme(IntervalIterator it) { - int itEnd = it.end(); - if (itEnd > queueEnd) { - queueEnd = itEnd; - innerEnd = it.start(); - } - } - - @Override - public int nextInterval() throws IOException { - while (this.queue.size() == subIterators.length && queue.top().start() == start) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } - if (this.queue.size() < subIterators.length) - return NO_MORE_INTERVALS; - do { - start = queue.top().start(); - innerStart = queue.top().end(); - end = queueEnd; - if (queue.top().end() == end) - return start; - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } while (this.queue.size() == subIterators.length && end == queueEnd); - return start; - } - + public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); } - public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NonOverlappingIterator(minuend, subtrahend); + public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); } - private static class NonOverlappingIterator implements IntervalIterator { - - final IntervalIterator minuend; - final IntervalIterator subtrahend; - boolean subPositioned; - - private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - this.minuend = minuend; - this.subtrahend = subtrahend; - } - - @Override - public int start() { - return minuend.start(); - } - - @Override - public int end() { - return minuend.end(); - } - - @Override - public int innerWidth() { - return minuend.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - subPositioned = subtrahend.reset(doc); - if (subPositioned) - subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; - return minuend.reset(doc); - } - - @Override - public int nextInterval() throws IOException { - if (subPositioned == false) - return minuend.nextInterval(); - while (minuend.nextInterval() != NO_MORE_INTERVALS) { - while (subtrahend.end() < minuend.start()) { - if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { - subPositioned = false; - return minuend.start(); - } - } - if (subtrahend.start() > minuend.end()) - return minuend.start(); - } - return NO_MORE_INTERVALS; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 9d2c7192fc4d..ef9eeeabc026 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -24,6 +24,8 @@ import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.TermsEnum; +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { @@ -130,7 +132,7 @@ public DocIdSetIterator iterator() { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) { - return Intervals.termIterator(postingsEnum); + return new TermIntervalIterator(postingsEnum); } return null; } @@ -159,4 +161,56 @@ public void setMinCompetitiveScore(float minScore) { /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; } + + private static class TermIntervalIterator implements IntervalIterator { + + public TermIntervalIterator(PostingsEnum pe) { + this.pe = pe; + } + + private final PostingsEnum pe; + + int upTo = -1; + int pos = -1; + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public boolean reset(int doc) throws IOException { + if (pe.docID() == doc) { + upTo = pe.freq(); + pos = -1; + return true; + } + upTo = -1; + return false; + } + + @Override + public int nextInterval() throws IOException { + if (upTo <= 0) { + return pos = NO_MORE_INTERVALS; + } + upTo--; + return pos = pe.nextPosition(); + } + + @Override + public String toString() { + return pe.docID() + "[" + pos + "]"; + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 110b03df3864..fa30610d7875 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,28 +85,28 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = IntervalQuery.ordered(field, 1, + Query q = Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), - IntervalQuery.ordered(field, 2, + Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))) ); @@ -115,20 +115,20 @@ public void testNestedOrderedNearQuery() throws IOException { } public void testUnorderedQuery() throws IOException { - Query q = IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); } public void testNotContainingQuery() throws IOException { - Query q = ContainingIntervalQuery.nonOverlapping(field, - IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + Query q = Intervals.nonOverlappingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 2, 4, 5}); } public void testNotWithinQuery() throws IOException { - Query q = ContainingIntervalQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + Query q = Intervals.notWithinQuery(field, new TermQuery(new Term(field, "w1")), 1, new TermQuery(new Term(field, "w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 1c2be22c840a..e6169823c829 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -126,7 +126,7 @@ public void testTermQueryIntervals() throws IOException { } public void testOrderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.ordered("field1", 100, + checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 3, new int[][]{ {}, @@ -139,7 +139,7 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.unordered("field1", 100, + checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 4, new int[][]{ {}, @@ -166,7 +166,7 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(IntervalQuery.unordered("field1", 100, + checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "porridge")), new BooleanQuery.Builder() From 77161885359dc12ed603563c81b803e291fd11c4 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 25 Feb 2018 12:00:12 +0000 Subject: [PATCH 12/83] Tests for containing/contained_by queries --- ...n.java => DifferenceIntervalFunction.java} | 130 +++++++++++++----- ...uery.java => DifferenceIntervalQuery.java} | 12 +- .../lucene/search/IntervalFunction.java | 100 ++++++++++++++ .../apache/lucene/search/IntervalScorer.java | 2 +- .../org/apache/lucene/search/Intervals.java | 24 +++- .../lucene/search/TestIntervalQuery.java | 46 ++++++- 6 files changed, 262 insertions(+), 52 deletions(-) rename lucene/core/src/java/org/apache/lucene/search/{IntervalDifferenceFunction.java => DifferenceIntervalFunction.java} (54%) rename lucene/core/src/java/org/apache/lucene/search/{ContainingIntervalQuery.java => DifferenceIntervalQuery.java} (93%) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java similarity index 54% rename from lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java rename to lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 6b5d58c9fe8d..a0a19fa81307 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -22,7 +22,7 @@ import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; -public abstract class IntervalDifferenceFunction { +public abstract class DifferenceIntervalFunction { @Override public abstract int hashCode(); @@ -35,70 +35,89 @@ public abstract class IntervalDifferenceFunction { public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); - public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { + public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return nonOverlapping(minuend, subtrahend); + return new NonOverlappingIterator(minuend, subtrahend); } }; - public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NonOverlappingIterator(minuend, subtrahend); - } + public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NotContainingIterator(minuend, subtrahend); + } + }; - private static class NonOverlappingIterator implements IntervalIterator { + public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NotContainedByIterator(minuend, subtrahend); + } + }; - final IntervalIterator minuend; - final IntervalIterator subtrahend; - boolean subPositioned; + private static abstract class RelativeIterator implements IntervalIterator { - private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - this.minuend = minuend; - this.subtrahend = subtrahend; + final IntervalIterator a; + final IntervalIterator b; + + boolean bpos; + + RelativeIterator(IntervalIterator a, IntervalIterator b) { + this.a = a; + this.b = b; } @Override public int start() { - return minuend.start(); + return a.start(); } @Override public int end() { - return minuend.end(); + return a.end(); } @Override public int innerWidth() { - return minuend.innerWidth(); + return a.innerWidth(); } @Override public boolean reset(int doc) throws IOException { - subPositioned = subtrahend.reset(doc); - if (subPositioned) - subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; - return minuend.reset(doc); + bpos = b.reset(doc); + if (bpos) + bpos = b.nextInterval() != NO_MORE_INTERVALS; + return a.reset(doc); + } + + } + + private static class NonOverlappingIterator extends RelativeIterator { + + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + super(minuend, subtrahend); } @Override public int nextInterval() throws IOException { - if (subPositioned == false) - return minuend.nextInterval(); - while (minuend.nextInterval() != NO_MORE_INTERVALS) { - while (subtrahend.end() < minuend.start()) { - if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { - subPositioned = false; - return minuend.start(); + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.start()) { + if (b.nextInterval() == NO_MORE_INTERVALS) { + bpos = false; + return a.start(); } } - if (subtrahend.start() > minuend.end()) - return minuend.start(); + if (b.start() > a.end()) + return a.start(); } return NO_MORE_INTERVALS; } } - public static class NotWithinFunction extends IntervalDifferenceFunction { + public static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; @@ -161,11 +180,58 @@ public int nextInterval() throws IOException { } } - private static abstract class SingletonFunction extends IntervalDifferenceFunction { + private static class NotContainingIterator extends RelativeIterator { + + private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + super(minuend, subtrahend); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.start() < a.start() && b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) { + bpos = false; + return a.start(); + } + } + if (b.start() > a.end()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + + } + + private static class NotContainedByIterator extends RelativeIterator { + + NotContainedByIterator(IntervalIterator a, IntervalIterator b) { + super(a, b); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return a.start(); + } + if (a.start() < b.start()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + } + + private static abstract class SingletonFunction extends DifferenceIntervalFunction { private final String name; - protected SingletonFunction(String name) { + SingletonFunction(String name) { this.name = name; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java similarity index 93% rename from lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java rename to lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index 8d564942e864..a971f909295f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -27,14 +27,14 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; -public class ContainingIntervalQuery extends Query { +public class DifferenceIntervalQuery extends Query { private final Query minuend; private final Query subtrahend; - private final IntervalDifferenceFunction function; + private final DifferenceIntervalFunction function; private final String field; - protected ContainingIntervalQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + protected DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; @@ -59,7 +59,7 @@ public Query rewrite(IndexReader reader) throws IOException { Query rewrittenMinuend = minuend.rewrite(reader); Query rewrittenSubtrahend = subtrahend.rewrite(reader); if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new ContainingIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + return new DifferenceIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); } return this; } @@ -68,7 +68,7 @@ public Query rewrite(IndexReader reader) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - ContainingIntervalQuery that = (ContainingIntervalQuery) o; + DifferenceIntervalQuery that = (DifferenceIntervalQuery) o; return Objects.equals(minuend, that.minuend) && Objects.equals(subtrahend, that.subtrahend) && Objects.equals(function, that.function); @@ -89,7 +89,7 @@ private class IntervalDifferenceWeight extends Weight { private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, Similarity similarity, Similarity.SimScorer simScorer) { - super(ContainingIntervalQuery.this); + super(DifferenceIntervalQuery.this); this.minuendWeight = minuendWeight; this.subtrahendWeight = subtrahendWeight; this.scoreMode = scoreMode; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 5a94a25d0a33..ef5d55b3b638 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -298,6 +298,106 @@ public int nextInterval() throws IOException { } + public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { + @Override + public IntervalIterator apply(List iterators) { + if (iterators.size() != 2) + throw new IllegalStateException("CONTAINING function requires two iterators"); + IntervalIterator a = iterators.get(0); + IntervalIterator b = iterators.get(1); + return new IntervalIterator() { + + boolean bpos; + + @Override + public int start() { + return a.start(); + } + + @Override + public int end() { + return a.end(); + } + + @Override + public int innerWidth() { + return a.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + bpos = b.reset(doc); + return a.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return NO_MORE_INTERVALS; + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.start() < a.start() && b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (a.start() <= b.start() && a.end() >= b.end()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + }; + } + }; + + public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { + @Override + public IntervalIterator apply(List iterators) { + if (iterators.size() != 2) + throw new IllegalStateException("CONTAINED_BY function requires two iterators"); + IntervalIterator a = iterators.get(0); + IntervalIterator b = iterators.get(1); + return new IntervalIterator() { + + boolean bpos; + + @Override + public int start() { + return a.start(); + } + + @Override + public int end() { + return a.end(); + } + + @Override + public int innerWidth() { + return a.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + bpos = b.reset(doc); + return a.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return NO_MORE_INTERVALS; + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (b.start() <= a.start()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + }; + } + }; + private static abstract class SingletonFunction extends IntervalFunction { private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index fb9476350618..9e804dde549e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -99,7 +99,7 @@ public int innerWidth() { public boolean reset(int doc) throws IOException { // inner iterator already reset() in TwoPhaseIterator.matches() started = false; - return true; + return doc == docID(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index ab12bad0d655..43ffb74a9bee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -17,12 +17,7 @@ package org.apache.lucene.search; -import java.io.IOException; import java.util.Arrays; -import java.util.List; - -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -53,12 +48,27 @@ public static Query unorderedQuery(String field, Query... subQueries) { } public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); + return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); + } + + public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { + return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); + } + + public static Query containingQuery(String field, Query big, Query small) { + return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); + } + + public static Query notContainedByQuery(String field, Query small, Query big) { + return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } + public static Query containedByQuery(String field, Query small, Query big) { + return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index fa30610d7875..8de0afa3ac9a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -61,10 +61,10 @@ public void tearDown() throws Exception { private String[] docFields = { "w1 w2 w3 w4 w5", "w1 w3 w2 w3", - "w1 xx w2 yy w3", + "w1 xx w2 w4 yy w3", "w1 w3 xx w2 yy w3", "w2 w1", - "w2 w1 w3 w2" + "w2 w1 w3 w2 w4" }; private void checkHits(Query query, int[] results) throws IOException { @@ -119,12 +119,12 @@ public void testUnorderedQuery() throws IOException { checkHits(q, new int[]{0, 1, 2, 3, 5}); } - public void testNotContainingQuery() throws IOException { + public void testNonOverlappingQuery() throws IOException { Query q = Intervals.nonOverlappingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w4")))); - checkHits(q, new int[]{0, 2, 4, 5}); + checkHits(q, new int[]{1, 3, 5}); } public void testNotWithinQuery() throws IOException { @@ -133,4 +133,38 @@ public void testNotWithinQuery() throws IOException { checkHits(q, new int[]{ 1, 2, 3 }); } + public void testNotContainingQuery() throws IOException { + Query q = Intervals.notContainingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{ 0, 2, 4, 5 }); + } + + public void testContainingQuery() throws IOException { + Query q = Intervals.containingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{ 1, 3, 5 }); + } + + public void testContainedByQuery() throws IOException { + Query q = Intervals.containedByQuery(field, + new TermQuery(new Term(field, "w3")), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2")))); + checkHits(q, new int[]{ 1, 3, 5 }); + } + + public void testNotContainedByQuery() throws IOException { + Query q = Intervals.notContainedByQuery(field, + new TermQuery(new Term(field, "w2")), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w4")))); + checkHits(q, new int[]{ 1, 3, 4, 5 }); + } + // contained-by + // not-contained-by + + // TODO: Overlapping + } From 855d07ee7cef804a8235ad0594cefbb1ed95b85a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 27 Feb 2018 16:50:48 +0000 Subject: [PATCH 13/83] Add intervals to exact phrase scorer --- .../lucene/search/CachedIntervalIterator.java | 64 +++++++++++++ .../lucene/search/ExactPhraseScorer.java | 91 ++++++++++++++++++- .../apache/lucene/search/IntervalScorer.java | 35 +------ .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../lucene/search/TestIntervalQuery.java | 7 ++ .../apache/lucene/search/TestIntervals.java | 13 +++ 7 files changed, 174 insertions(+), 40 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java new file mode 100644 index 000000000000..e035d33594d2 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +class CachedIntervalIterator implements IntervalIterator { + + final IntervalIterator in; + final Scorer scorer; + + boolean started = false; + + CachedIntervalIterator(IntervalIterator in, Scorer scorer) { + this.in = in; + this.scorer = scorer; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + // inner iterator already reset() in TwoPhaseIterator.matches() + started = false; + return doc == scorer.docID(); + } + + @Override + public int nextInterval() throws IOException { + if (started == false) { + started = true; + return start(); + } + return in.nextInterval(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index d68f8557dd10..5aec1e64ff62 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -38,6 +38,7 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private final DocIdSetIterator conjunction; private final PostingsAndPosition[] postings; + private final String field; private int freq; @@ -46,13 +47,17 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private float matchCost; private float minCompetitiveScore; - ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + private final IntervalIterator intervals; + + ExactPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, LeafSimScorer docScorer, ScoreMode scoreMode, float matchCost) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = scoreMode.needsScores(); this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES; + this.field = field; + this.intervals = new ExactPhraseIntervals(); List iterators = new ArrayList<>(); List postingsAndPositions = new ArrayList<>(); @@ -86,7 +91,9 @@ public boolean matches() throws IOException { return false; } } - return phraseFreq() > 0; + freq = -1; + intervals.reset(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override @@ -106,7 +113,8 @@ public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - final int freq() { + final int freq() throws IOException { + ensureFreq(); return freq; } @@ -117,6 +125,7 @@ public int docID() { @Override public float score() throws IOException { + ensureFreq(); return docScorer.score(docID(), freq); } @@ -127,7 +136,18 @@ public float getMaxScore(int upTo) throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + if (this.field.equals(field) == false) + return null; + return new CachedIntervalIterator(intervals, this); + } + + private void ensureFreq() throws IOException { + if (freq == -1) { + freq = 1; + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + freq++; + } + } } /** Advance the given pos enum to the first doc on or after {@code target}. @@ -145,6 +165,69 @@ private static boolean advancePosition(PostingsAndPosition posting, int target) return true; } + private class ExactPhraseIntervals implements IntervalIterator { + + @Override + public int start() { + return postings[0].pos; + } + + @Override + public int end() { + return postings[postings.length - 1].pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public boolean reset(int doc) throws IOException { + if (conjunction.docID() != doc) + return false; + for (PostingsAndPosition posting : postings) { + posting.freq = posting.postings.freq(); + posting.pos = -1; + posting.upTo = 0; + } + return true; + } + + @Override + public int nextInterval() throws IOException { + final PostingsAndPosition lead = postings[0]; + if (lead.upTo == lead.freq) + return Intervals.NO_MORE_INTERVALS; + + lead.pos = lead.postings.nextPosition(); + lead.upTo += 1; + + advanceHead: + while (true) { + final int phrasePos = lead.pos - lead.offset; + for (int j = 1; j < postings.length; ++j) { + final PostingsAndPosition posting = postings[j]; + final int expectedPos = phrasePos + posting.offset; + + // advance up to the same position as the lead + if (advancePosition(posting, expectedPos) == false) { + return Intervals.NO_MORE_INTERVALS; + } + + if (posting.pos != expectedPos) { // we advanced too far + if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { + continue advanceHead; + } else { + return Intervals.NO_MORE_INTERVALS; + } + } + } + return lead.pos; + } + } + } + private int phraseFreq() throws IOException { // reset state final PostingsAndPosition[] postings = this.postings; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 9e804dde549e..2ac445efb4f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -77,40 +77,7 @@ private void ensureFreq() throws IOException { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) - return new IntervalIterator() { - boolean started = false; - - @Override - public int start() { - return intervals.start(); - } - - @Override - public int end() { - return intervals.end(); - } - - @Override - public int innerWidth() { - return intervals.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - // inner iterator already reset() in TwoPhaseIterator.matches() - started = false; - return doc == docID(); - } - - @Override - public int nextInterval() throws IOException { - if (started == false) { - started = true; - return start(); - } - return intervals.nextInterval(); - } - }; + return new CachedIntervalIterator(intervals, this); return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 2b6bde8a1daa..cae6cb3c67e9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -292,7 +292,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { - return new ExactPhraseScorer(this, postingsFreqs, + return new ExactPhraseScorer(this, field, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 360b0175061e..4e0fb43f37e8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -435,7 +435,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { // optimize exact case - return new ExactPhraseScorer(this, postingsFreqs, + return new ExactPhraseScorer(this, field, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 8de0afa3ac9a..23aa37f655b3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -114,6 +114,13 @@ public void testNestedOrderedNearQuery() throws IOException { checkHits(q, new int[]{0, 1, 2}); } + public void testNearPhraseQuery() throws IOException { + Query q = Intervals.unorderedQuery(field, + new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).build(), + new TermQuery(new Term(field, "w4"))); + checkHits(q, new int[]{ 5 }); + } + public void testUnorderedQuery() throws IOException { Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index e6169823c829..8dc190c9e8cd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -125,6 +125,19 @@ public void testTermQueryIntervals() throws IOException { }); } + public void testExactPhraseQueryIntervals() throws IOException { + checkIntervals(new PhraseQuery.Builder() + .add(new Term("field1", "pease")) + .add(new Term("field1", "porridge")).build(), "field1", 3, new int[][]{ + {}, + { 0, 1, 3, 4, 6, 7 }, + { 0, 1, 3, 4, 6, 7 }, + {}, + { 0, 1, 3, 4, 6, 7 }, + {} + }); + } + public void testOrderedNearIntervals() throws IOException { checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), From 8224bf9c9cddc720a5fb42177277df8fe1a7c0d6 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 27 Feb 2018 18:39:58 +0000 Subject: [PATCH 14/83] Add intervals to sloppy phrase scorer --- .../lucene/search/MultiPhraseQuery.java | 2 +- .../apache/lucene/search/PhrasePositions.java | 6 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../lucene/search/SloppyPhraseScorer.java | 110 ++++++++++++++++-- .../lucene/search/TestIntervalQuery.java | 7 ++ .../apache/lucene/search/TestIntervals.java | 17 +++ 6 files changed, 133 insertions(+), 11 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index cae6cb3c67e9..7df670d73f19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -296,7 +296,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, + return new SloppyPhraseScorer(this, field, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index 640cd5f20e45..d39cec2293e9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -24,7 +24,8 @@ * Position of a term in a document that takes into account the term offset within the phrase. */ final class PhrasePositions { - int position; // position in doc + int realPosition; // position in doc + int position; // position in phrase int count; // remaining pos in this doc int offset; // position in phrase final int ord; // unique across all PhrasePositions instances @@ -54,7 +55,8 @@ final void firstPosition() throws IOException { */ final boolean nextPosition() throws IOException { if (count-- > 0) { // read subsequent pos's - position = postings.nextPosition() - offset; + realPosition = postings.nextPosition(); + position = realPosition - offset; return true; } else return false; diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 4e0fb43f37e8..a4ff6150815d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -439,7 +439,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, + return new SloppyPhraseScorer(this, field, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 20a375955410..a103af73fa7e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -32,6 +32,8 @@ final class SloppyPhraseScorer extends Scorer { private final DocIdSetIterator conjunction; private final PhrasePositions[] phrasePositions; + private final IntervalIterator intervals; + private final String field; private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). @@ -40,6 +42,8 @@ final class SloppyPhraseScorer extends Scorer { private final int slop; private final int numPostings; private final PhraseQueue pq; // for advancing min position + + private int start, currentEnd, nextEnd; private int end; // current largest phrase position @@ -53,13 +57,15 @@ final class SloppyPhraseScorer extends Scorer { final boolean needsScores; private final float matchCost; - SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + SloppyPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, int slop, LeafSimScorer docScorer, boolean needsScores, float matchCost) { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; this.slop = slop; + this.field = field; + this.intervals = new SloppyIntervalIterator(); this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length]; @@ -73,6 +79,74 @@ final class SloppyPhraseScorer extends Scorer { this.matchCost = matchCost; } + private class SloppyIntervalIterator implements IntervalIterator { + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return currentEnd; + } + + @Override + public int innerWidth() { + return currentEnd - start; + } + + @Override + public boolean reset(int doc) throws IOException { + start = currentEnd = nextEnd = -1; + return initPhrasePositions(); + } + + @Override + public int nextInterval() throws IOException { + if (pq.size() < phrasePositions.length) + return Intervals.NO_MORE_INTERVALS; + currentEnd = nextEnd; + PhrasePositions pp = pq.pop(); + start = pp.realPosition; + int matchLength = end - pp.position; + int next = pq.top().position; + int nextStart = pq.top().realPosition; + while (advancePP(pp)) { + if (hasRpts && !advanceRpts(pp)) { + break; // pps exhausted + } + if (pp.position > next) { // done minimizing current match-length + if (matchLength <= slop) { + pq.add(pp); + if (pp.realPosition > nextEnd) + nextEnd = pp.realPosition; + return start; + } + pq.add(pp); + pp = pq.pop(); + next = pq.top().position; + matchLength = end - pp.position; + } else { + int matchLength2 = end - pp.position; + if (matchLength2 < matchLength) { + matchLength = matchLength2; + } + if (pp.realPosition > nextStart) { + start = nextStart; + } + else { + start = pp.realPosition; + } + } + } + if (matchLength <= slop) { + return start; + } + return Intervals.NO_MORE_INTERVALS; + } + } + /** * Score a candidate doc for all slop-valid position-combinations (matches) * encountered while traversing/hopping the PhrasePositions. @@ -242,6 +316,9 @@ private void initSimple() throws IOException { if (pp.position > end) { end = pp.position; } + if (pp.realPosition > nextEnd) { + nextEnd = pp.realPosition; + } pq.add(pp); } } @@ -271,6 +348,9 @@ private void fillQueue() { if (pp.position > end) { end = pp.position; } + if (pp.realPosition > nextEnd) { + nextEnd = pp.realPosition; + } pq.add(pp); } } @@ -515,11 +595,13 @@ private HashMap termGroups(LinkedHashMap tord, Array return tg; } - int freq() { + int freq() throws IOException { + ensureFreq(); return numMatches; } - float sloppyFreq() { + float sloppyFreq() throws IOException { + ensureFreq(); return sloppyFreq; } @@ -544,7 +626,17 @@ float sloppyFreq() { // } // } - + private void ensureFreq() throws IOException { + if (sloppyFreq == -1) { + numMatches = 1; + sloppyFreq = intervals.score(); + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + sloppyFreq += intervals.score(); + numMatches++; + } + } + } + @Override public int docID() { return conjunction.docID(); @@ -552,6 +644,7 @@ public int docID() { @Override public float score() throws IOException { + ensureFreq(); return docScorer.score(docID(), sloppyFreq); } @@ -565,7 +658,9 @@ public float getMaxScore(int upTo) throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit. this will be fun + if (this.field.equals(field)) + return new CachedIntervalIterator(intervals, this); + return null; } @Override @@ -573,8 +668,9 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { @Override public boolean matches() throws IOException { - sloppyFreq = phraseFreq(); // check for phrase - return sloppyFreq != 0F; + sloppyFreq = -1; + intervals.reset(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 23aa37f655b3..535d31dd0fa2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -121,6 +121,13 @@ public void testNearPhraseQuery() throws IOException { checkHits(q, new int[]{ 5 }); } + public void testSloppyPhraseQuery() throws IOException { + Query q = Intervals.unorderedQuery(field, + new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).setSlop(2).build(), + new TermQuery(new Term(field, "w4"))); + checkHits(q, new int[]{ 0, 5 }); + } + public void testUnorderedQuery() throws IOException { Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 8dc190c9e8cd..d86ac68c774e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,6 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -138,6 +139,22 @@ public void testExactPhraseQueryIntervals() throws IOException { }); } + public void testSloppyPhraseQueryIntervals() throws IOException { + checkIntervals(new PhraseQuery.Builder() + .add(new Term("field1", "pease")) + .add(new Term("field1", "porridge")) + .add(new Term("field1", "hot")) + .setSlop(3).build(), "field1", 3, new int[][]{ + {}, + { 0, 2, 1, 3, 2, 4 }, + { 0, 5, 3, 5, 3, 7, 5, 7 }, + {}, + { 0, 2, 1, 3, 2, 4 }, + {} + } + ); + } + public void testOrderedNearIntervals() throws IOException { checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), From 38e422aa2d8534ff2a11623913dee7920498f3c6 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 19:28:05 +0000 Subject: [PATCH 15/83] Add test for boolean exclusion combinations --- .../apache/lucene/search/BooleanQuery.java | 2 +- .../lucene/search/CachedIntervalIterator.java | 22 +------ .../search/DisjunctionIntervalIterator.java | 22 ++----- .../lucene/search/DisjunctionScorer.java | 19 ++++-- .../lucene/search/FilterIntervalIterator.java | 59 +++++++++++++++++++ .../apache/lucene/search/ReqExclScorer.java | 9 ++- .../apache/lucene/search/ReqOptSumScorer.java | 8 ++- .../apache/lucene/search/TestIntervals.java | 33 +++++++++-- 8 files changed, 124 insertions(+), 50 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f52df9fb9cd8..9d89fbed2d86 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -201,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false) { + if (scoreMode.needsScores() == false && scoreMode.useQueryCache()) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java index e035d33594d2..5e5f68a365ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -19,33 +19,17 @@ import java.io.IOException; -class CachedIntervalIterator implements IntervalIterator { +class CachedIntervalIterator extends FilterIntervalIterator { - final IntervalIterator in; final Scorer scorer; - boolean started = false; + private boolean started = false; CachedIntervalIterator(IntervalIterator in, Scorer scorer) { - this.in = in; + super(in); this.scorer = scorer; } - @Override - public int start() { - return in.start(); - } - - @Override - public int end() { - return in.end(); - } - - @Override - public int innerWidth() { - return in.innerWidth(); - } - @Override public boolean reset(int doc) throws IOException { // inner iterator already reset() in TwoPhaseIterator.matches() diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index a7df0b4d59a3..9cd7f2ba6df4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -24,23 +24,17 @@ abstract class DisjunctionIntervalIterator implements IntervalIterator { - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; + protected final PriorityQueue queue; IntervalIterator current; - DisjunctionIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { + DisjunctionIntervalIterator(int iteratorCount) { + this.queue = new PriorityQueue(iteratorCount) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } } @Override @@ -58,18 +52,12 @@ public int innerWidth() { return current.innerWidth(); } - protected abstract void positionSubIntervals() throws IOException; + protected abstract void fillQueue(int doc) throws IOException; @Override public boolean reset(int doc) throws IOException { - positionSubIntervals(); queue.clear(); - for (IntervalIterator subIterator : subIterators) { - if (subIterator.reset(doc)) { - subIterator.nextInterval(); - queue.add(subIterator); - } - } + fillQueue(doc); current = null; return queue.size() > 0; } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 9e5ab2813fd6..e0dcf8845e39 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -20,7 +20,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.util.PriorityQueue; @@ -182,18 +185,24 @@ public final float score() throws IOException { @Override public IntervalIterator intervals(String field) { - List subIntervals = new ArrayList<>(); + Map subIntervals = new IdentityHashMap<>(); for (DisiWrapper dw : subScorers) { IntervalIterator subIt = dw.scorer.intervals(field); if (subIt != null) - subIntervals.add(subIt); + subIntervals.put(dw, subIt); } if (subIntervals.size() == 0) return null; - return new DisjunctionIntervalIterator(subIntervals) { + return new DisjunctionIntervalIterator(subIntervals.size()) { @Override - protected void positionSubIntervals() throws IOException { - getSubMatches(); + protected void fillQueue(int doc) throws IOException { + for (DisiWrapper dw = getSubMatches(); dw != null; dw = dw.next) { + IntervalIterator it = subIntervals.get(dw); + if (it.reset(doc)) { + it.nextInterval(); + queue.add(it); + } + } } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java new file mode 100644 index 000000000000..4ca9f6e01aa3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +public abstract class FilterIntervalIterator implements IntervalIterator { + + protected final IntervalIterator in; + + protected FilterIntervalIterator(IntervalIterator in) { + this.in = in; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + return in.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + return in.nextInterval(); + } + + @Override + public float score() { + return in.score(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index aa9108512190..f4f91b22e0db 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -63,7 +63,14 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { @Override public IntervalIterator intervals(String field) { - return reqScorer.intervals(field); + return new FilterIntervalIterator(reqScorer.intervals(field)) { + @Override + public boolean reset(int doc) throws IOException { + if (doc == ReqExclScorer.this.docID()) + return in.reset(doc); + return false; + } + }; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 5a502024d781..3069cb1ec2da 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -192,10 +192,14 @@ public IntervalIterator intervals(String field) { return reqIntervals; if (reqIntervals == null) return optIntervals; - return new DisjunctionIntervalIterator(Arrays.asList(reqIntervals, optIntervals)) { + return new DisjunctionIntervalIterator(2) { @Override - protected void positionSubIntervals() throws IOException { + protected void fillQueue(int doc) throws IOException { + reqIntervals.reset(doc); + queue.add(reqIntervals); positionOptionalScorers(); + if (optIntervals.reset(doc)) + queue.add(optIntervals); } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index d86ac68c774e..ce59c804653a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -42,9 +42,9 @@ public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", - "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", + "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the pot", "Nor here, nowt hot going on in pease this one", - "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve", "Porridge is great" }; @@ -99,7 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -198,8 +198,8 @@ public void testIntervalDisjunction() throws IOException { public void testNesting() throws IOException { checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), - new TermQuery(new Term("field1", "porridge")), - new BooleanQuery.Builder() + new TermQuery(new Term("field1", "porridge")), + new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) .build()), "field1", 3, new int[][]{ @@ -211,4 +211,27 @@ public void testNesting() throws IOException { {} }); } + + // x near ((a not b) or (c not d)) + public void testBooleans() throws IOException { + checkIntervals(Intervals.unorderedQuery("field1", + new TermQuery(new Term("field1", "pease")), + new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field1", "years")), BooleanClause.Occur.MUST_NOT) + .build(), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.MUST_NOT) + .build(), BooleanClause.Occur.SHOULD) + .build()), "field1", 2, new int[][]{ + {}, + { 6, 11 }, + {}, + {}, + { 6, 21 }, + {} + }); + } } From 14832e93b88e75cb077a8f583dd851b4684ebcc6 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 19:30:02 +0000 Subject: [PATCH 16/83] ScoreMode.canUseCache() -> ScoreMode.needsPositions() --- .../src/java/org/apache/lucene/search/BooleanQuery.java | 2 +- .../src/java/org/apache/lucene/search/IndexSearcher.java | 2 +- .../core/src/java/org/apache/lucene/search/ScoreMode.java | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 9d89fbed2d86..ee89ca11de28 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -201,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false && scoreMode.useQueryCache()) { + if (scoreMode.needsScores() == false && scoreMode.needsPositions() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index c23d3da347d8..5ab5adec1a84 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -697,7 +697,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.useQueryCache() && queryCache != null) { + if (scoreMode.needsPositions() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 2c014efc31d8..ea3ed9f3648b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -61,8 +61,8 @@ public boolean needsScores() { } @Override - public boolean useQueryCache() { - return false; + public boolean needsPositions() { + return true; } @Override @@ -94,8 +94,8 @@ public int minRequiredPostings() { public abstract int minRequiredPostings(); - public boolean useQueryCache() { - return !needsScores(); + public boolean needsPositions() { + return needsScores(); } } From dfd6fd723176c112970e7fc310b6a2383d4f0011 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 20:12:24 +0000 Subject: [PATCH 17/83] Add intervals to ConjunctionScorer --- .../lucene/search/ConjunctionScorer.java | 21 +++++++++++++++- .../apache/lucene/search/TestIntervals.java | 24 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index f820cd0aaa8e..93e03f3c4177 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.List; /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { @@ -103,7 +104,25 @@ public Collection getChildren() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + List subIntervals = new ArrayList<>(); + for (Scorer scorer : required) { + IntervalIterator it = scorer.intervals(field); + if (it != null) { + subIntervals.add(it); + } + } + if (subIntervals.size() == 0) { + return null; + } + return new DisjunctionIntervalIterator(subIntervals.size()) { + @Override + protected void fillQueue(int doc) throws IOException { + for (IntervalIterator it : subIntervals) { + it.reset(doc); + queue.add(it); + } + } + }; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index ce59c804653a..2c2ebb48093d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -213,7 +213,7 @@ public void testNesting() throws IOException { } // x near ((a not b) or (c not d)) - public void testBooleans() throws IOException { + public void testExclusionBooleans() throws IOException { checkIntervals(Intervals.unorderedQuery("field1", new TermQuery(new Term("field1", "pease")), new BooleanQuery.Builder() @@ -234,4 +234,26 @@ public void testBooleans() throws IOException { {} }); } + + public void testConjunctionBooleans() throws IOException { + checkIntervals(Intervals.unorderedQuery("field1", + new TermQuery(new Term("field1", "pease")), + new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field2", "caverns")), BooleanClause.Occur.MUST) + .build(), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field2", "sunless")), BooleanClause.Occur.MUST) + .build(), BooleanClause.Occur.SHOULD) + .build()), "field1", 2, new int[][]{ + {}, + { 6, 11 }, + { 6, 11 }, + {}, + {}, + {} + }); + } } From 87a4f254d7803d6df21db66f253064bddd2a0f30 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 22:17:26 +0000 Subject: [PATCH 18/83] Minimum-should-match --- .../lucene/search/DisjunctionScorer.java | 7 +--- .../lucene/search/IntervalFunction.java | 16 +++++---- .../search/MinShouldMatchSumScorer.java | 33 ++++++++++++++++--- .../apache/lucene/search/TestIntervals.java | 21 +++++++++++- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index e0dcf8845e39..47d37af01f3a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -130,18 +130,13 @@ public boolean matches() throws IOException { // implicitly verified, move it to verifiedMatches w.next = verifiedMatches; verifiedMatches = w; - - if (needsScores == false) { - // we can stop here - return true; - } } else { unverifiedMatches.add(w); } w = next; } - if (verifiedMatches != null) { + if (verifiedMatches != null || needsScores == false) { return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index ef5d55b3b638..9892caa1c400 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -251,12 +251,16 @@ public boolean reset(int doc) throws IOException { this.queueEnd = start = end = innerEnd = innerStart = -1; boolean positioned = true; for (IntervalIterator subIterator : subIterators) { - positioned &= subIterator.reset(doc); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); + if (subIterator.reset(doc)) { + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } + } + else { + positioned = false; } } return positioned; diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index 98df563a61ba..fad7d9c99989 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import java.util.stream.LongStream; import java.util.stream.StreamSupport; @@ -126,11 +128,6 @@ public final Collection getChildren() throws IOException { return matchingChildren; } - @Override - public IntervalIterator intervals(String field) { - return null; // nocommit - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -330,6 +327,32 @@ public float score() throws IOException { return (float) score; } + @Override + public IntervalIterator intervals(String field) { + Map its = new IdentityHashMap<>(); + for (DisiWrapper s = lead; s != null; s = s.next) { + IntervalIterator it = s.scorer.intervals(field); + if (it != null) { + its.put(s, it); + } + } + if (its.size() == 0) + return null; + return new DisjunctionIntervalIterator(its.size()) { + @Override + protected void fillQueue(int doc) throws IOException { + updateFreq(); + for (DisiWrapper s = lead; s != null; s = s.next) { + IntervalIterator it = its.get(s); + if (it.reset(doc)) { + it.nextInterval(); + queue.add(it); + } + } + } + }; + } + @Override public float getMaxScore(int upTo) throws IOException { // TODO: implement but be careful about floating-point errors. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 2c2ebb48093d..291494e9787f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -42,7 +42,7 @@ public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", - "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the pot", + "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the fraggle", "Nor here, nowt hot going on in pease this one", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve", "Porridge is great" @@ -256,4 +256,23 @@ public void testConjunctionBooleans() throws IOException { {} }); } + + public void testMinimumShouldMatch() throws IOException { + checkIntervals(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "porridge")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "fraggle")), BooleanClause.Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(), BooleanClause.Occur.SHOULD) + .build(), "field1", 4, new int[][]{ + {}, + { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 29, 29 }, + { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 27, 27 }, + { 7, 7 }, + { 0, 0, 3, 3, 6, 6 }, + {} + }); + } } From 53fc6b3b058370c8412850525683ca30391958db Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 16:29:27 +0000 Subject: [PATCH 19/83] Javadocs --- .../search/DifferenceIntervalFunction.java | 27 +++- .../search/DifferenceIntervalQuery.java | 2 +- .../search/DisjunctionIntervalIterator.java | 5 +- .../lucene/search/ExactPhraseScorer.java | 10 +- .../apache/lucene/search/IntervalFilter.java | 2 +- .../lucene/search/IntervalFunction.java | 43 +++++- .../lucene/search/IntervalIterator.java | 41 ++++- .../apache/lucene/search/IntervalScorer.java | 4 +- .../org/apache/lucene/search/Intervals.java | 146 +++++++++++++++++- .../java/org/apache/lucene/search/Scorer.java | 12 ++ .../lucene/search/SloppyPhraseScorer.java | 8 +- .../org/apache/lucene/search/TermScorer.java | 2 - .../apache/lucene/search/TestIntervals.java | 2 +- 13 files changed, 271 insertions(+), 33 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index a0a19fa81307..218e493a2316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -20,8 +20,10 @@ import java.io.IOException; import java.util.Objects; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - +/** + * A function that takes two interval iterators and combines them to produce a third, + * generally by computing a difference interval between them + */ public abstract class DifferenceIntervalFunction { @Override @@ -33,8 +35,15 @@ public abstract class DifferenceIntervalFunction { @Override public abstract String toString(); + /** + * Combine two interval iterators into a third + */ public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); + /** + * Filters the minuend iterator so that only intervals that do not overlap intervals from the + * subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -42,6 +51,10 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; + /** + * Filters the minuend iterator so that only intervals that do not contain intervals from the + * subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -49,6 +62,10 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; + /** + * Filters the minuend iterator so that only intervals that are not contained by intervals from + * the subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -117,11 +134,15 @@ public int nextInterval() throws IOException { } } + /** + * Filters the minuend iterator so that only intervals that do not occur within a set number + * of positions of intervals from the subtrahend iterator are returned + */ public static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; - public NotWithinFunction(int positions) { + NotWithinFunction(int positions) { this.positions = positions; } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index a971f909295f..3874fe9689b1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -138,7 +138,7 @@ public boolean matches() throws IOException { if (subtrahendScorer.docID() < approximation.docID()) { subtrahendScorer.iterator().advance(approximation.docID()); } - return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 9cd7f2ba6df4..269689bc43cd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.List; import org.apache.lucene.util.PriorityQueue; @@ -71,13 +70,13 @@ public int nextInterval() throws IOException { int start = current.start(), end = current.end(); while (queue.size() > 0 && contains(queue.top(), start, end)) { IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); } } if (queue.size() == 0) { current = IntervalIterator.EMPTY; - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } current = queue.top(); return current.start(); diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 5aec1e64ff62..c9e16cf04a7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -93,7 +93,7 @@ public boolean matches() throws IOException { } freq = -1; intervals.reset(docID()); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override @@ -144,7 +144,7 @@ public IntervalIterator intervals(String field) { private void ensureFreq() throws IOException { if (freq == -1) { freq = 1; - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { freq++; } } @@ -198,7 +198,7 @@ public boolean reset(int doc) throws IOException { public int nextInterval() throws IOException { final PostingsAndPosition lead = postings[0]; if (lead.upTo == lead.freq) - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; lead.pos = lead.postings.nextPosition(); lead.upTo += 1; @@ -212,14 +212,14 @@ public int nextInterval() throws IOException { // advance up to the same position as the lead if (advancePosition(posting, expectedPos) == false) { - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } if (posting.pos != expectedPos) { // we advanced too far if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { continue advanceHead; } else { - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 4f33a85861f4..58f6bda3936c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,7 +55,7 @@ public final int nextInterval() throws IOException { do { next = in.nextInterval(); } - while (accept() == false && next != Intervals.NO_MORE_INTERVALS); + while (accept() == false && next != IntervalIterator.NO_MORE_INTERVALS); return next; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 9892caa1c400..0a47ca1483fe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -18,14 +18,14 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.Arrays; import java.util.List; import java.util.Objects; import org.apache.lucene.util.PriorityQueue; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - +/** + * Combine a list of {@link IntervalIterator}s into another + */ public abstract class IntervalFunction { @Override @@ -37,8 +37,14 @@ public abstract class IntervalFunction { @Override public abstract String toString(); + /** + * Combine the iterators into another iterator + */ public abstract IntervalIterator apply(List iterators); + /** + * Return an iterator over intervals where the subiterators appear in a given order + */ public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { @@ -46,8 +52,17 @@ public IntervalIterator apply(List intervalIterators) { } }; + /** + * Return an iterator over intervals where the subiterators appear in a given order, + * filtered by width + */ public static class OrderedNearFunction extends IntervalFunction { + /** + * Create a new OrderedNearFunction + * @param minWidth the minimum width of returned intervals + * @param maxWidth the maximum width of returned intervals + */ public OrderedNearFunction(int minWidth, int maxWidth) { this.minWidth = minWidth; this.maxWidth = maxWidth; @@ -81,7 +96,7 @@ public int hashCode() { } } - public static IntervalIterator orderedIntervalIterator(List subIterators) { + private static IntervalIterator orderedIntervalIterator(List subIterators) { for (IntervalIterator it : subIterators) { if (it == IntervalIterator.EMPTY) return IntervalIterator.EMPTY; @@ -157,7 +172,9 @@ public int nextInterval() throws IOException { } } - + /** + * Return an iterator over intervals where the subiterators appear in any order + */ public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { @@ -165,11 +182,19 @@ public IntervalIterator apply(List intervalIterators) { } }; + /** + * An iterator over intervals where the subiterators appear in any order, within a given width range + */ public static class UnorderedNearFunction extends IntervalFunction { final int minWidth; final int maxWidth; + /** + * Create a new UnorderedNearFunction + * @param minWidth the minimum width of the returned intervals + * @param maxWidth the maximum width of the returned intervals + */ public UnorderedNearFunction(int minWidth, int maxWidth) { this.minWidth = minWidth; this.maxWidth = maxWidth; @@ -201,7 +226,7 @@ public int hashCode() { } } - public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + private static IntervalIterator unorderedIntervalIterator(List subIntervals) { for (IntervalIterator it : subIntervals) { if (it == IntervalIterator.EMPTY) return IntervalIterator.EMPTY; @@ -302,6 +327,9 @@ public int nextInterval() throws IOException { } + /** + * Returns an interval over iterators where the first iterator contains intervals from the second + */ public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { @Override public IntervalIterator apply(List iterators) { @@ -352,6 +380,9 @@ public int nextInterval() throws IOException { } }; + /** + * Return an iterator over intervals where the first iterator is contained by intervals from the second + */ public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { @Override public IntervalIterator apply(List iterators) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index b14211e0eea1..22b6c043ce4f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -19,24 +19,59 @@ import java.io.IOException; -import org.apache.lucene.index.PostingsEnum; - +/** + * Defines methods to iterate over the intervals that a {@link Scorer} matches + * on a document + */ public interface IntervalIterator { + /** + * When returned from {@link #nextInterval()}, indicates that there are no more + * matching intervals on the current document + */ + int NO_MORE_INTERVALS = Integer.MAX_VALUE; + + /** + * The start of the current interval + */ int start(); + /** + * The end of the current interval + */ int end(); + /** + * The width of the current interval + */ int innerWidth(); + /** + * Called to reset the iterator on a new document + * + * @return true if the iterator's parent Scorer is positioned on the given doc id + */ boolean reset(int doc) throws IOException; + /** + * Advance the iterator to the next interval + * + * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if + * there are no more intervals on the current document + */ int nextInterval() throws IOException; + /** + * The score of the current interval + */ default float score() { return (float) (1.0 / (1 + innerWidth())); } + /** + * An empty iterator that always returns {@code false} from {@link #reset(int)} and + * {@link IntervalIterator#NO_MORE_INTERVALS} from {@link #nextInterval()} + */ IntervalIterator EMPTY = new IntervalIterator() { @Override @@ -61,7 +96,7 @@ public boolean reset(int doc) { @Override public int nextInterval() { - return Intervals.NO_MORE_INTERVALS; + return NO_MORE_INTERVALS; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 2ac445efb4f4..8d15dcf626fa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -70,7 +70,7 @@ private void ensureFreq() throws IOException { do { freq += intervals.score(); } - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS); } } @@ -91,7 +91,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 43ffb74a9bee..7a4570ae2c73 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,56 +19,198 @@ import java.util.Arrays; +/** + * Constructor functions for interval-based queries + */ public final class Intervals { - public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - + private Intervals() {} + + /** + * Create an ordered query with a maximum width + * + * Matches documents in which the subqueries all match in the given order, and + * in which the width of the interval over which the queries match is less than + * the defined width + * + * @param field the field to query + * @param width the maximum width of subquery-spanning intervals that will match + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } + /** + * Create an ordered query with a defined width range + * + * Matches documents in which the subqueries all match in the given order, and in + * which the width of the interval over which the queries match is between the + * minimum and maximum defined widths + * + * @param field the field to query + * @param minWidth the minimum width of subquery-spanning intervals that will match + * @param maxWidth the maximum width of subquery-spanning intervals that will match + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } + /** + * Create an ordered query with an unbounded width range + * + * Matches documents in which the subqueries all match in the given order + * + * @param field the field to query + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); } + /** + * Create an unordered query with a maximum width + * + * Matches documents in which the subqueries all match in any order, and in which + * the width of the interval over which the queries match is less than the + * defined width + * + * @param field the field to query + * @param width the maximum width of subquery-spanning intervals that will match + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } + /** + * Create an unordered query with a defined width range + * + * Matches documents in which the subqueries all match in any order, and in which + * the width of the interval over which the queries match is between the minimum + * and maximum defined widths + * + * @param field the field to query + * @param minWidth the minimum width of subquery-spanning intervals that will match + * @param maxWidth the maximum width of subquery-spanning intervals that will match + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } + /** + * Create an unordered query with an unbounded width range + * + * Matches documents in which all the subqueries match. This is essence a pure conjunction + * query, but it will expose iterators over those conjunctions that may then be further + * nested in other interval queries + * + * @param field the field to query + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); } + /** + * Create a non-overlapping query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query overlap with intervals from the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param subtrahend the query to filter by + */ public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } + /** + * Create a not-within query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query appear within a set number of positions of intervals from the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param positions the maximum distance that intervals from the minuend may occur from intervals + * of the subtrahend + * @param subtrahend the query to filter by + */ public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); } + /** + * Create a not-containing query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query are contained within an interval of the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param subtrahend the query to filter by + */ public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); } + /** + * Create a containing query + * + * Matches documents where intervals of the big query contain one or more intervals from + * the small query + * + * Exposes matching intervals from the big query + * + * @param field the field to query + * @param big the query to filter + * @param small the query to filter by + */ public static Query containingQuery(String field, Query big, Query small) { return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); } + /** + * Create a not-contained-by query + * + * Matches documents that match the small query, except when the intervals of the small + * query are contained within an interval of the big query + * + * Exposes matching intervals from the small query + * + * @param field the field to query + * @param small the query to filter + * @param big the query to filter by + */ public static Query notContainedByQuery(String field, Query small, Query big) { return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } + /** + * Create a contained-by query + * + * Matches documents where intervals of the small query occur within intervals of the big query + * + * Exposes matching intervals from the small query + * + * @param field the field to query + * @param small the query to filter + * @param big the query to filter by + */ public static Query containedByQuery(String field, Query small, Query big) { return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } + // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions + } diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 3c05fb194281..9e1d46c8ecfc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,6 +125,18 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); + /** + * Return a {@link IntervalIterator} over matching intervals for a given field + * + * Consumers should call {@link IntervalIterator#reset(int)} when the parent + * Scorer's {@link DocIdSetIterator} has moved to a new document, and then + * iterate over the intervals by repeatedly calling {@link IntervalIterator#nextInterval()} + * until {@link IntervalIterator#NO_MORE_INTERVALS} is returned. + * + * @param field The field to retrieve intervals for + * @return an {@link IntervalIterator}, or {@code null} if no intervals are available + * for the given field + */ public abstract IntervalIterator intervals(String field); /** diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index a103af73fa7e..f5374f9a4f69 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -105,7 +105,7 @@ public boolean reset(int doc) throws IOException { @Override public int nextInterval() throws IOException { if (pq.size() < phrasePositions.length) - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; currentEnd = nextEnd; PhrasePositions pp = pq.pop(); start = pp.realPosition; @@ -143,7 +143,7 @@ public int nextInterval() throws IOException { if (matchLength <= slop) { return start; } - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } } @@ -630,7 +630,7 @@ private void ensureFreq() throws IOException { if (sloppyFreq == -1) { numMatches = 1; sloppyFreq = intervals.score(); - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { sloppyFreq += intervals.score(); numMatches++; } @@ -670,7 +670,7 @@ public TwoPhaseIterator twoPhaseIterator() { public boolean matches() throws IOException { sloppyFreq = -1; intervals.reset(docID()); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index ef9eeeabc026..79f00d30cb57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -24,8 +24,6 @@ import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.TermsEnum; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 291494e9787f..1dd032007310 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -98,7 +98,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i int id = (int) ids.longValue(); if (intervals.reset(doc)) { int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); From d46307bbdb3dd2b19fb7d552c5cc19bebd5df8bc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 17:01:13 +0000 Subject: [PATCH 20/83] cleanups --- .../search/BlockMaxConjunctionScorer.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 3 +- .../lucene/search/CachedIntervalIterator.java | 6 ++ .../search/DifferenceIntervalQuery.java | 13 +++- .../org/apache/lucene/search/DisiWrapper.java | 1 - .../search/DisjunctionIntervalIterator.java | 6 ++ .../lucene/search/ExactPhraseScorer.java | 48 ------------ .../apache/lucene/search/IndexSearcher.java | 3 +- .../apache/lucene/search/IntervalFilter.java | 17 ++++ .../apache/lucene/search/IntervalQuery.java | 16 ++-- .../org/apache/lucene/search/Intervals.java | 5 ++ .../org/apache/lucene/search/ScoreMode.java | 14 ++-- .../lucene/search/SloppyPhraseScorer.java | 77 +++++-------------- .../org/apache/lucene/search/WANDScorer.java | 2 +- .../java/org/apache/lucene/search/Weight.java | 2 +- .../apache/lucene/search/TestIntervals.java | 2 +- 16 files changed, 90 insertions(+), 127 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 2c625643ef15..02f4a0f88dda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -242,6 +242,6 @@ public Collection getChildren() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index ee89ca11de28..bfe00e2e4bf4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -33,6 +33,7 @@ import java.util.function.Predicate; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.search.BooleanClause.Occur; /** A Query that matches documents matching boolean combinations of other @@ -201,7 +202,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false && scoreMode.needsPositions() == false) { + if (scoreMode.minRequiredPostings() == PostingsEnum.NONE) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java index 5e5f68a365ea..5501cffae316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -19,6 +19,12 @@ import java.io.IOException; +/** + * An interval iterator which caches its first invocation. + * + * Useful for two-phase queries that confirm matches by checking that at least one + * interval exists in a given document + */ class CachedIntervalIterator extends FilterIntervalIterator { final Scorer scorer; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index 3874fe9689b1..b175fbac7fc8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -27,6 +27,10 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; +/** + * A query that retrieves documents containing intervals returned from a + * {@link DifferenceIntervalFunction} over a minuend query and a subtrahend query + */ public class DifferenceIntervalQuery extends Query { private final Query minuend; @@ -34,7 +38,14 @@ public class DifferenceIntervalQuery extends Query { private final DifferenceIntervalFunction function; private final String field; - protected DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { + /** + * Create a new DifferenceIntervalQuery + * @param field the field to query + * @param minuend the subquery to filter + * @param subtrahend the subquery to filter by + * @param function a {@link DifferenceIntervalFunction} to combine the minuend and subtrahend + */ + public DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 0a581804aaa3..fac9418010f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,6 +81,5 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 269689bc43cd..ce3ff22b41b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -21,6 +21,9 @@ import org.apache.lucene.util.PriorityQueue; +/** + * Implements the minimum-interval OR algorithm + */ abstract class DisjunctionIntervalIterator implements IntervalIterator { protected final PriorityQueue queue; @@ -51,6 +54,9 @@ public int innerWidth() { return current.innerWidth(); } + /** + * Called to repopulate the interval priority queue when moving to a new document + */ protected abstract void fillQueue(int doc) throws IOException; @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index c9e16cf04a7c..c69f175505fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -228,52 +228,4 @@ public int nextInterval() throws IOException { } } - private int phraseFreq() throws IOException { - // reset state - final PostingsAndPosition[] postings = this.postings; - for (PostingsAndPosition posting : postings) { - posting.freq = posting.postings.freq(); - posting.pos = posting.postings.nextPosition(); - posting.upTo = 1; - } - - int freq = 0; - final PostingsAndPosition lead = postings[0]; - - advanceHead: - while (true) { - final int phrasePos = lead.pos - lead.offset; - for (int j = 1; j < postings.length; ++j) { - final PostingsAndPosition posting = postings[j]; - final int expectedPos = phrasePos + posting.offset; - - // advance up to the same position as the lead - if (advancePosition(posting, expectedPos) == false) { - break advanceHead; - } - - if (posting.pos != expectedPos) { // we advanced too far - if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { - continue advanceHead; - } else { - break advanceHead; - } - } - } - - freq += 1; - if (needsScores == false) { - break; - } - - if (lead.upTo == lead.freq) { - break; - } - lead.pos = lead.postings.nextPosition(); - lead.upTo += 1; - } - - return this.freq = freq; - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 5ab5adec1a84..9011183f7aca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -697,7 +698,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.needsPositions() == false && queryCache != null) { + if (scoreMode.minRequiredPostings() == PostingsEnum.NONE && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 58f6bda3936c..deb842cb1a49 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -19,8 +19,15 @@ import java.io.IOException; +/** + * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function + */ public abstract class IntervalFilter implements IntervalIterator { + /** + * Filter an {@link IntervalIterator} by its outer width, ie the distance between the + * start and end of the iterator + */ public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { return new IntervalFilter(in) { @Override @@ -31,6 +38,10 @@ protected boolean accept() { }; } + /** + * Filter an {@link IntervalIterator} by its inner width, ie the distance between the + * end of its first subiterator and the beginning of its last + */ public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { return new IntervalFilter(in) { @Override @@ -43,10 +54,16 @@ protected boolean accept() { private final IntervalIterator in; + /** + * Create a new filter + */ public IntervalFilter(IntervalIterator in) { this.in = in; } + /** + * @return {@code true} if the wrapped iterator's interval should be passed on + */ protected abstract boolean accept(); @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index dfab7da1e3c7..3c2683cb6046 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -31,17 +31,23 @@ import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; +/** + * A query that retrieves documents containing intervals returned from an + * {@link IntervalFunction} over a set of subqueries + */ public final class IntervalQuery extends Query { private final String field; private final List subQueries; private final IntervalFunction iteratorFunction; - protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { - this(field, subQueries, null, iteratorFunction); - } - - protected IntervalQuery(String field, List subQueries, Query subtrahend, IntervalFunction iteratorFunction) { + /** + * Create a new IntervalQuery + * @param field the field to query + * @param subQueries the subqueries to generate intervals from + * @param iteratorFunction an {@link IntervalFunction} to combine the intervals from the subqueries + */ + public IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; this.iteratorFunction = iteratorFunction; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 7a4570ae2c73..bc0455289240 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,11 @@ /** * Constructor functions for interval-based queries + * + * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} + * classes, implementing minimum-interval algorithms taken from the paper + * + * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics */ public final class Intervals { diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index ea3ed9f3648b..815286adddc9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -54,17 +54,15 @@ public int minRequiredPostings() { } }, + /** + * Produced scorers will allow visiting all matches, and expose positions + */ COMPLETE_POSITIONS { @Override public boolean needsScores() { return false; } - @Override - public boolean needsPositions() { - return true; - } - @Override public int minRequiredPostings() { return PostingsEnum.POSITIONS; @@ -92,10 +90,10 @@ public int minRequiredPostings() { */ public abstract boolean needsScores(); + /** + * The minimum flags to be passed to {@link org.apache.lucene.index.TermsEnum#postings(PostingsEnum, int)} + */ public abstract int minRequiredPostings(); - public boolean needsPositions() { - return needsScores(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index f5374f9a4f69..de9546be3938 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -147,63 +147,6 @@ public int nextInterval() throws IOException { } } - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
    The score contribution of a match depends on the distance: - *
    - highest score for distance=0 (exact match). - *
    - score gets lower as distance gets higher. - *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
    Possibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - private float phraseFreq() throws IOException { - if (!initPhrasePositions()) { - return 0.0f; - } - float freq = 0.0f; - numMatches = 0; - PhrasePositions pp = pq.pop(); - int matchLength = end - pp.position; - int next = pq.top().position; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { - break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length - if (matchLength <= slop) { - freq += (1.0 / (1.0 + matchLength)); // score match - numMatches++; - if (!needsScores) { - return freq; - } - } - pq.add(pp); - pp = pq.pop(); - next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { - matchLength = matchLength2; - } - } - } - if (matchLength <= slop) { - freq += (1.0 / (1.0 + matchLength)); // score match - numMatches++; - } - return freq; - } - /** advance a PhrasePosition and update 'end', return false if exhausted */ private boolean advancePP(PhrasePositions pp) throws IOException { if (!pp.nextPosition()) { @@ -625,7 +568,25 @@ float sloppyFreq() throws IOException { // } // } // } - + + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
    The score contribution of a match depends on the distance: + *
    - highest score for distance=0 (exact match). + *
    - score gets lower as distance gets higher. + *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
    Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ private void ensureFreq() throws IOException { if (sloppyFreq == -1) { numMatches = 1; diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f39380b5cd88..f4ef706fcf1d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -442,7 +442,7 @@ private void advanceAllTail() throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 244056313f78..3106e1962a59 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -44,7 +44,7 @@ * A Weight is used in the following way: *

      *
    1. A Weight is constructed by a top-level query, given a - * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)}). + * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). *
    2. A Scorer is constructed by * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. *
    diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 1dd032007310..7711f13509c8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,7 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); From 96d6ba70b7da91632d2209e0a5f7769bc9f96731 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 17:12:02 +0000 Subject: [PATCH 21/83] Expose intervals from SpanScorer --- .../lucene/search/spans/SpanScorer.java | 36 +++++++++++++++++-- .../lucene/search/spans/SpanWeight.java | 2 +- .../apache/lucene/search/TestIntervals.java | 19 +++++++++- .../queries/payloads/PayloadScoreQuery.java | 6 ++-- .../payloads/SpanPayloadCheckQuery.java | 2 +- 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 5fe3fd8dbbf8..2ec0c5d2b372 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -32,6 +32,7 @@ */ public class SpanScorer extends Scorer { + protected final String field; protected final Spans spans; protected final LeafSimScorer docScorer; @@ -42,10 +43,11 @@ public class SpanScorer extends Scorer { private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for /** Sole constructor. */ - public SpanScorer(SpanWeight weight, Spans spans, LeafSimScorer docScorer) { + public SpanScorer(SpanWeight weight, String field, Spans spans, LeafSimScorer docScorer) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; + this.field = field; } /** return the Spans for this Scorer **/ @@ -60,7 +62,9 @@ public int docID() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + if (this.field.equals(field)) + return new SpanIntervalIterator(); + return null; } @Override @@ -152,4 +156,32 @@ final float sloppyFreq() throws IOException { return freq; } + private class SpanIntervalIterator implements IntervalIterator { + + @Override + public int start() { + return spans.startPosition(); + } + + @Override + public int end() { + return spans.endPosition() - 1; + } + + @Override + public int innerWidth() { + return spans.width(); + } + + @Override + public boolean reset(int doc) throws IOException { + return spans.docID() == doc; + } + + @Override + public int nextInterval() throws IOException { + return spans.nextStartPosition(); + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 25b58fdc39a0..f19ca742076f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -130,7 +130,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, spans, docScorer); + return new SpanScorer(this, field, spans, docScorer); } /** diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 7711f13509c8..339b4f657f62 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -31,6 +31,9 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -99,7 +102,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -275,4 +278,18 @@ public void testMinimumShouldMatch() throws IOException { {} }); } + + public void testSpanNearQueryEquivalence() throws IOException { + checkIntervals(new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term("field1", "pease")), + new SpanTermQuery(new Term("field1", "hot"))}, 100, true), + "field1", 3, new int[][]{ + {}, + {0, 2, 3, 17, 6, 17}, + {0, 5, 3, 5, 6, 21}, + {}, + { 0, 2, 3, 17, 6, 17 }, + { } + }); + } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index bd5d927c6275..5b5d5812559b 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -151,7 +151,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); - return new PayloadSpanScorer(this, payloadSpans, docScorer); + return new PayloadSpanScorer(this, field, payloadSpans, docScorer); } @Override @@ -227,8 +227,8 @@ private class PayloadSpanScorer extends SpanScorer { private final PayloadSpans spans; - private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { - super(weight, spans, docScorer); + private PayloadSpanScorer(SpanWeight weight, String field, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { + super(weight, field, spans, docScorer); this.spans = spans; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index a9d3bfb2da9a..dbee623c59b6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -128,7 +128,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, spans, docScorer); + return new SpanScorer(this, field, spans, docScorer); } @Override From c411c670ba43d5dbeccc86f2fb5bcd9a28b5d896 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 6 Mar 2018 01:50:23 +0000 Subject: [PATCH 22/83] IntervalsSource --- .../java/org/apache/lucene/index/Sorter.java | 25 +++ .../search/BlockMaxConjunctionScorer.java | 4 - .../apache/lucene/search/BooleanQuery.java | 3 +- .../lucene/search/CachingCollector.java | 5 - .../search/ConjunctionIntervalIterator.java | 66 ++++++ .../search/ConjunctionIntervalsSource.java | 77 +++++++ .../lucene/search/ConjunctionScorer.java | 30 +-- .../lucene/search/ConstantScoreScorer.java | 5 - .../search/DifferenceIntervalFunction.java | 27 ++- .../search/DifferenceIntervalQuery.java | 169 --------------- .../search/DifferenceIntervalsSource.java | 74 +++++++ .../org/apache/lucene/search/DisiWrapper.java | 18 ++ .../search/DisjunctionIntervalIterator.java | 95 -------- .../search/DisjunctionIntervalsSource.java | 175 +++++++++++++++ .../lucene/search/DisjunctionScorer.java | 34 +-- .../lucene/search/ExactPhraseScorer.java | 118 ++++------ .../org/apache/lucene/search/FakeScorer.java | 5 - .../lucene/search/FilterIntervalIterator.java | 14 +- .../apache/lucene/search/FilterScorer.java | 5 - .../apache/lucene/search/IndexSearcher.java | 3 +- .../apache/lucene/search/IntervalFilter.java | 25 +-- .../lucene/search/IntervalFunction.java | 77 +++---- .../lucene/search/IntervalIterator.java | 43 +--- .../apache/lucene/search/IntervalQuery.java | 63 ++---- .../apache/lucene/search/IntervalScorer.java | 22 +- .../org/apache/lucene/search/Intervals.java | 202 +++++++++--------- ...rvalIterator.java => IntervalsSource.java} | 36 +--- .../search/MinShouldMatchSumScorer.java | 28 --- .../lucene/search/MultiPhraseQuery.java | 8 +- .../apache/lucene/search/PhrasePositions.java | 6 +- .../org/apache/lucene/search/PhraseQuery.java | 8 +- .../apache/lucene/search/ReqExclScorer.java | 12 -- .../apache/lucene/search/ReqOptSumScorer.java | 35 +-- .../org/apache/lucene/search/ScoreMode.java | 39 ---- .../java/org/apache/lucene/search/Scorer.java | 15 -- .../lucene/search/SloppyPhraseScorer.java | 176 +++++---------- .../apache/lucene/search/SynonymQuery.java | 3 +- .../lucene/search/TermIntervalsSource.java | 127 +++++++++++ .../org/apache/lucene/search/TermQuery.java | 4 +- .../org/apache/lucene/search/TermScorer.java | 69 +----- .../org/apache/lucene/search/WANDScorer.java | 5 - .../java/org/apache/lucene/search/Weight.java | 1 - .../lucene/search/spans/SpanScorer.java | 40 +--- .../lucene/search/spans/SpanWeight.java | 2 +- .../lucene/search/JustCompileSearch.java | 5 - .../search/TestBoolean2ScorerSupplier.java | 5 - .../lucene/search/TestCachingCollector.java | 5 - .../lucene/search/TestConjunctionDISI.java | 5 - .../lucene/search/TestIntervalQuery.java | 80 +++---- .../apache/lucene/search/TestIntervals.java | 148 ++----------- .../lucene/search/TestMinShouldMatch2.java | 5 - .../TestPositiveScoresOnlyCollector.java | 5 - .../lucene/search/TestQueryRescorer.java | 5 - .../TestScoreCachingWrappingScorer.java | 5 - .../lucene/search/TestTopDocsCollector.java | 5 - .../lucene/search/TestTopFieldCollector.java | 7 +- .../queries/payloads/PayloadScoreQuery.java | 6 +- .../payloads/SpanPayloadCheckQuery.java | 2 +- .../apache/lucene/search/AssertingScorer.java | 5 - .../lucene/search/BlockScoreQueryWrapper.java | 4 - .../search/BulkScorerWrapperScorer.java | 5 - .../search/RandomApproximationQuery.java | 5 - 62 files changed, 939 insertions(+), 1366 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java delete mode 100644 lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java delete mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java rename lucene/core/src/java/org/apache/lucene/search/{CachedIntervalIterator.java => IntervalsSource.java} (54%) create mode 100644 lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index a081ea7aaf3b..c47f9a118abb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -445,5 +445,30 @@ public String getID() { public String toString() { return getID(); } + + static final Scorer FAKESCORER = new Scorer(null) { + + float score; + int doc = -1; + + @Override + public int docID() { + return doc; + } + + public DocIdSetIterator iterator() { + throw new UnsupportedOperationException(); + } + + @Override + public float score() throws IOException { + return score; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 02f4a0f88dda..070b6c40f025 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -240,8 +240,4 @@ public Collection getChildren() { return children; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index bfe00e2e4bf4..f52df9fb9cd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -33,7 +33,6 @@ import java.util.function.Predicate; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.search.BooleanClause.Occur; /** A Query that matches documents matching boolean combinations of other @@ -202,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.minRequiredPostings() == PostingsEnum.NONE) { + if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index ae705455ecfa..3bed88dd9980 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -64,11 +64,6 @@ public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public final float score() { return score; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java new file mode 100644 index 000000000000..53139fe42ec8 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +abstract class ConjunctionIntervalIterator implements IntervalIterator { + + protected final List subIterators; + + final DocIdSetIterator approximation; + final float cost; + + ConjunctionIntervalIterator(List subIterators) { + this.subIterators = subIterators; + float costsum = 0; + List approximations = new ArrayList<>(); + for (IntervalIterator it : subIterators) { + costsum += it.cost(); + approximations.add(it.approximation()); + } + this.cost = costsum; + this.approximation = ConjunctionDISI.intersectIterators(approximations); + + } + + @Override + public final DocIdSetIterator approximation() { + return approximation; + } + + @Override + public final boolean advanceTo(int doc) throws IOException { + for (IntervalIterator it : subIterators) { + if (it.advanceTo(doc) == false) + return false; + } + reset(); + return true; + } + + protected abstract void reset() throws IOException; + + @Override + public final float cost() { + return cost; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java new file mode 100644 index 000000000000..2ee0422786c9 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class ConjunctionIntervalsSource extends IntervalsSource { + + final List subSources; + final IntervalFunction function; + + ConjunctionIntervalsSource(List subSources, IntervalFunction function) { + this.subSources = subSources; + this.function = function; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ConjunctionIntervalsSource that = (ConjunctionIntervalsSource) o; + return Objects.equals(subSources, that.subSources) && + Objects.equals(function, that.function); + } + + @Override + public String toString() { + return function + subSources.stream().map(Object::toString).collect(Collectors.joining(",", "(", ")")); + } + + @Override + public void extractTerms(String field, Set terms) { + for (IntervalsSource source : subSources) { + source.extractTerms(field, terms); + } + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + List subIntervals = new ArrayList<>(); + for (IntervalsSource source : subSources) { + IntervalIterator it = source.intervals(field, ctx); + if (it == null) + return null; + subIntervals.add(it); + } + return function.apply(subIntervals); + } + + @Override + public int hashCode() { + return Objects.hash(subSources, function); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 93e03f3c4177..7a1b9563721b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.List; /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { @@ -102,27 +101,14 @@ public Collection getChildren() { return children; } - @Override - public IntervalIterator intervals(String field) { - List subIntervals = new ArrayList<>(); - for (Scorer scorer : required) { - IntervalIterator it = scorer.intervals(field); - if (it != null) { - subIntervals.add(it); - } - } - if (subIntervals.size() == 0) { - return null; + static final class DocsAndFreqs { + final long cost; + final DocIdSetIterator iterator; + int doc = -1; + + DocsAndFreqs(DocIdSetIterator iterator) { + this.iterator = iterator; + this.cost = iterator.cost(); } - return new DisjunctionIntervalIterator(subIntervals.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - for (IntervalIterator it : subIntervals) { - it.reset(doc); - queue.add(it); - } - } - }; } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 0040374b6147..45a6bdbad041 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -63,11 +63,6 @@ public DocIdSetIterator iterator() { return disi; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public TwoPhaseIterator twoPhaseIterator() { return twoPhaseIterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 218e493a2316..2ee7d36d78ac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -101,13 +101,22 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); + public boolean advanceTo(int doc) throws IOException { + bpos = b.advanceTo(doc); if (bpos) bpos = b.nextInterval() != NO_MORE_INTERVALS; - return a.reset(doc); + return a.advanceTo(doc); } + @Override + public DocIdSetIterator approximation() { + return a.approximation(); + } + + @Override + public float cost() { + return a.cost() + b.cost(); + } } private static class NonOverlappingIterator extends RelativeIterator { @@ -166,7 +175,7 @@ public int hashCode() { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new IntervalIterator() { + IntervalIterator notWithin = new FilterIntervalIterator(subtrahend) { @Override public int start() { int start = subtrahend.start(); @@ -186,16 +195,6 @@ public int end() { public int innerWidth() { throw new UnsupportedOperationException(); } - - @Override - public boolean reset(int doc) throws IOException { - return subtrahend.reset(doc); - } - - @Override - public int nextInterval() throws IOException { - return subtrahend.nextInterval(); - } }; return NON_OVERLAPPING.apply(minuend, notWithin); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java deleted file mode 100644 index b175fbac7fc8..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; -import java.util.Collections; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; - -/** - * A query that retrieves documents containing intervals returned from a - * {@link DifferenceIntervalFunction} over a minuend query and a subtrahend query - */ -public class DifferenceIntervalQuery extends Query { - - private final Query minuend; - private final Query subtrahend; - private final DifferenceIntervalFunction function; - private final String field; - - /** - * Create a new DifferenceIntervalQuery - * @param field the field to query - * @param minuend the subquery to filter - * @param subtrahend the subquery to filter by - * @param function a {@link DifferenceIntervalFunction} to combine the minuend and subtrahend - */ - public DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { - this.minuend = minuend; - this.subtrahend = subtrahend; - this.function = function; - this.field = field; - } - - @Override - public String toString(String field) { - return function + "(" + minuend + ", " + subtrahend + ")"; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Weight minuendWeight = searcher.createWeight(minuend, ScoreMode.COMPLETE_POSITIONS, 1); - Weight subtrahendWeight = searcher.createWeight(subtrahend, ScoreMode.COMPLETE_POSITIONS, 1); - return new IntervalDifferenceWeight(minuendWeight, subtrahendWeight, scoreMode, - searcher.getSimilarity(), IntervalQuery.buildSimScorer(field, searcher, Collections.singletonList(minuendWeight), boost)); - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - Query rewrittenMinuend = minuend.rewrite(reader); - Query rewrittenSubtrahend = subtrahend.rewrite(reader); - if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new DifferenceIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); - } - return this; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DifferenceIntervalQuery that = (DifferenceIntervalQuery) o; - return Objects.equals(minuend, that.minuend) && - Objects.equals(subtrahend, that.subtrahend) && - Objects.equals(function, that.function); - } - - @Override - public int hashCode() { - return Objects.hash(minuend, subtrahend, function); - } - - private class IntervalDifferenceWeight extends Weight { - - final Weight minuendWeight; - final Weight subtrahendWeight; - final ScoreMode scoreMode; - final Similarity similarity; - final Similarity.SimScorer simScorer; - - private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, - Similarity similarity, Similarity.SimScorer simScorer) { - super(DifferenceIntervalQuery.this); - this.minuendWeight = minuendWeight; - this.subtrahendWeight = subtrahendWeight; - this.scoreMode = scoreMode; - this.similarity = similarity; - this.simScorer = simScorer; - } - - @Override - public void extractTerms(Set terms) { - this.minuendWeight.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - IntervalScorer scorer = (IntervalScorer) scorer(context); - if (scorer != null) { - int newDoc = scorer.iterator().advance(doc); - if (newDoc == doc) { - return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); - } - } - return Explanation.noMatch("no matching intervals"); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Scorer minuendScorer = minuendWeight.scorer(context); - Scorer subtrahendScorer = subtrahendWeight.scorer(context); - if (subtrahendScorer == null || minuendScorer == null) - return minuendScorer; - - IntervalIterator minuendIt = minuendScorer.intervals(field); - IntervalIterator subtrahendIt = subtrahendScorer.intervals(field); - if (subtrahendIt == IntervalIterator.EMPTY || subtrahendIt == null) - return minuendScorer; - - LeafSimScorer leafScorer = simScorer == null ? null - : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); - - return new IntervalScorer(this, field, minuendScorer.iterator(), function.apply(minuendIt, subtrahendIt), leafScorer){ - @Override - public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(approximation) { - @Override - public boolean matches() throws IOException { - if (subtrahendScorer.docID() < approximation.docID()) { - subtrahendScorer.iterator().advance(approximation.docID()); - } - return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return 0; - } - }; - } - }; - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return minuendWeight.isCacheable(ctx) && subtrahendWeight.isCacheable(ctx); - } - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java new file mode 100644 index 000000000000..d26217729026 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class DifferenceIntervalsSource extends IntervalsSource { + + final IntervalsSource minuend; + final IntervalsSource subtrahend; + final DifferenceIntervalFunction function; + + public DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend, DifferenceIntervalFunction function) { + this.minuend = minuend; + this.subtrahend = subtrahend; + this.function = function; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + IntervalIterator minIt = minuend.intervals(field, ctx); + if (minIt == null) + return null; + IntervalIterator subIt = subtrahend.intervals(field, ctx); + if (subIt == null) + return minIt; + return function.apply(minIt, subIt); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DifferenceIntervalsSource that = (DifferenceIntervalsSource) o; + return Objects.equals(minuend, that.minuend) && + Objects.equals(subtrahend, that.subtrahend) && + Objects.equals(function, that.function); + } + + @Override + public int hashCode() { + return Objects.hash(minuend, subtrahend, function); + } + + @Override + public String toString() { + return function + "(" + minuend + ", " + subtrahend + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + minuend.extractTerms(field, terms); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index fac9418010f4..aba5dff908a6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -46,9 +46,14 @@ public class DisiWrapper { public int lastApproxMatchDoc; // last doc of approximation that did match public int lastApproxNonMatchDoc; // last doc of approximation that did not match + // For IntervalIterators + // TODO clean this up! + public final IntervalIterator intervals; + public DisiWrapper(Scorer scorer) { this.scorer = scorer; this.spans = null; + this.intervals = null; this.iterator = scorer.iterator(); this.cost = iterator.cost(); this.doc = -1; @@ -66,6 +71,7 @@ public DisiWrapper(Scorer scorer) { public DisiWrapper(Spans spans) { this.scorer = null; this.spans = spans; + this.intervals = null; this.iterator = spans; this.cost = iterator.cost(); this.doc = -1; @@ -81,5 +87,17 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } + + public DisiWrapper(IntervalIterator iterator) { + this.scorer = null; + this.spans = null; + this.intervals = iterator; + this.iterator = iterator.approximation(); + this.cost = iterator.approximation().cost(); + this.doc = -1; + this.twoPhaseView = null; + this.approximation = iterator.approximation(); + this.matchCost = iterator.cost(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java deleted file mode 100644 index ce3ff22b41b2..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; - -import org.apache.lucene.util.PriorityQueue; - -/** - * Implements the minimum-interval OR algorithm - */ -abstract class DisjunctionIntervalIterator implements IntervalIterator { - - protected final PriorityQueue queue; - - IntervalIterator current; - - DisjunctionIntervalIterator(int iteratorCount) { - this.queue = new PriorityQueue(iteratorCount) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; - } - - @Override - public int start() { - return current.start(); - } - - @Override - public int end() { - return current.end(); - } - - @Override - public int innerWidth() { - return current.innerWidth(); - } - - /** - * Called to repopulate the interval priority queue when moving to a new document - */ - protected abstract void fillQueue(int doc) throws IOException; - - @Override - public boolean reset(int doc) throws IOException { - queue.clear(); - fillQueue(doc); - current = null; - return queue.size() > 0; - } - - @Override - public int nextInterval() throws IOException { - if (current == null) { - current = queue.top(); - return current.start(); - } - int start = current.start(), end = current.end(); - while (queue.size() > 0 && contains(queue.top(), start, end)) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - queue.add(it); - } - } - if (queue.size() == 0) { - current = IntervalIterator.EMPTY; - return IntervalIterator.NO_MORE_INTERVALS; - } - current = queue.top(); - return current.start(); - } - - private boolean contains(IntervalIterator it, int start, int end) { - return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); - } - -} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java new file mode 100644 index 000000000000..053ddd4a02f5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.PriorityQueue; + +class DisjunctionIntervalsSource extends IntervalsSource { + + final List subSources; + + public DisjunctionIntervalsSource(List subSources) { + this.subSources = subSources; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + List subIterators = new ArrayList<>(); + for (IntervalsSource subSource : subSources) { + IntervalIterator it = subSource.intervals(field, ctx); + if (it != null) { + subIterators.add(it); + } + } + if (subIterators.size() == 0) + return null; + return new DisjunctionIntervalIterator(subIterators); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; + return Objects.equals(subSources, that.subSources); + } + + @Override + public int hashCode() { + return Objects.hash(subSources); + } + + @Override + public String toString() { + return subSources.stream().map(Object::toString).collect(Collectors.joining(",", "or(", ")")); + } + + @Override + public void extractTerms(String field, Set terms) { + for (IntervalsSource source : subSources) { + source.extractTerms(field, terms); + } + } + + private static class DisjunctionIntervalIterator implements IntervalIterator { + + final PriorityQueue intervalQueue; + final DisiPriorityQueue disiQueue; + final DisjunctionDISIApproximation approximation; + final List iterators; + final float matchCost; + + IntervalIterator current; + + DisjunctionIntervalIterator(List iterators) { + this.iterators = iterators; + this.intervalQueue = new PriorityQueue(iterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.disiQueue = new DisiPriorityQueue(iterators.size()); + float costsum = 0; + for (IntervalIterator it : iterators) { + this.disiQueue.add(new DisiWrapper(it)); + costsum += it.cost(); + } + this.matchCost = costsum; + this.approximation = new DisjunctionDISIApproximation(this.disiQueue); + } + + @Override + public DocIdSetIterator approximation() { + return approximation; + } + + @Override + public float cost() { + return matchCost; + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + intervalQueue.clear(); + int approxDoc = this.approximation.docID(); + if (approxDoc > doc || (approxDoc != doc && this.approximation.advance(doc) != doc)) { + return false; + } + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + IntervalIterator it = dw.intervals; + if (it.advanceTo(doc)) { + it.nextInterval(); + intervalQueue.add(it); + } + } + current = null; + return intervalQueue.size() > 0; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = intervalQueue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) { + IntervalIterator it = intervalQueue.pop(); + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { + intervalQueue.add(it); + } + } + if (intervalQueue.size() == 0) { + current = null; + return IntervalIterator.NO_MORE_INTERVALS; + } + current = intervalQueue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 47d37af01f3a..147b993f2d9f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -20,10 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; -import java.util.IdentityHashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.util.PriorityQueue; @@ -130,13 +127,18 @@ public boolean matches() throws IOException { // implicitly verified, move it to verifiedMatches w.next = verifiedMatches; verifiedMatches = w; + + if (needsScores == false) { + // we can stop here + return true; + } } else { unverifiedMatches.add(w); } w = next; } - if (verifiedMatches != null || needsScores == false) { + if (verifiedMatches != null) { return true; } @@ -178,30 +180,6 @@ public final float score() throws IOException { return score(getSubMatches()); } - @Override - public IntervalIterator intervals(String field) { - Map subIntervals = new IdentityHashMap<>(); - for (DisiWrapper dw : subScorers) { - IntervalIterator subIt = dw.scorer.intervals(field); - if (subIt != null) - subIntervals.put(dw, subIt); - } - if (subIntervals.size() == 0) - return null; - return new DisjunctionIntervalIterator(subIntervals.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - for (DisiWrapper dw = getSubMatches(); dw != null; dw = dw.next) { - IntervalIterator it = subIntervals.get(dw); - if (it.reset(doc)) { - it.nextInterval(); - queue.add(it); - } - } - } - }; - } - /** Compute the score for the given linked list of scorers. */ protected abstract float score(DisiWrapper topList) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index c69f175505fc..d7c4f9f6e2b8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -38,7 +38,6 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private final DocIdSetIterator conjunction; private final PostingsAndPosition[] postings; - private final String field; private int freq; @@ -47,17 +46,13 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private float matchCost; private float minCompetitiveScore; - private final IntervalIterator intervals; - - ExactPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, LeafSimScorer docScorer, ScoreMode scoreMode, float matchCost) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = scoreMode.needsScores(); this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES; - this.field = field; - this.intervals = new ExactPhraseIntervals(); List iterators = new ArrayList<>(); List postingsAndPositions = new ArrayList<>(); @@ -91,9 +86,7 @@ public boolean matches() throws IOException { return false; } } - freq = -1; - intervals.reset(docID()); - return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + return phraseFreq() > 0; } @Override @@ -113,8 +106,7 @@ public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - final int freq() throws IOException { - ensureFreq(); + final int freq() { return freq; } @@ -125,7 +117,6 @@ public int docID() { @Override public float score() throws IOException { - ensureFreq(); return docScorer.score(docID(), freq); } @@ -134,22 +125,6 @@ public float getMaxScore(int upTo) throws IOException { return docScorer.maxScore(); } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field) == false) - return null; - return new CachedIntervalIterator(intervals, this); - } - - private void ensureFreq() throws IOException { - if (freq == -1) { - freq = 1; - while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - freq++; - } - } - } - /** Advance the given pos enum to the first doc on or after {@code target}. * Return {@code false} if the enum was exhausted before reaching * {@code target} and {@code true} otherwise. */ @@ -165,67 +140,52 @@ private static boolean advancePosition(PostingsAndPosition posting, int target) return true; } - private class ExactPhraseIntervals implements IntervalIterator { - - @Override - public int start() { - return postings[0].pos; + private int phraseFreq() throws IOException { + // reset state + final PostingsAndPosition[] postings = this.postings; + for (PostingsAndPosition posting : postings) { + posting.freq = posting.postings.freq(); + posting.pos = posting.postings.nextPosition(); + posting.upTo = 1; } - @Override - public int end() { - return postings[postings.length - 1].pos; - } + int freq = 0; + final PostingsAndPosition lead = postings[0]; - @Override - public int innerWidth() { - return 0; - } + advanceHead: + while (true) { + final int phrasePos = lead.pos - lead.offset; + for (int j = 1; j < postings.length; ++j) { + final PostingsAndPosition posting = postings[j]; + final int expectedPos = phrasePos + posting.offset; - @Override - public boolean reset(int doc) throws IOException { - if (conjunction.docID() != doc) - return false; - for (PostingsAndPosition posting : postings) { - posting.freq = posting.postings.freq(); - posting.pos = -1; - posting.upTo = 0; + // advance up to the same position as the lead + if (advancePosition(posting, expectedPos) == false) { + break advanceHead; + } + + if (posting.pos != expectedPos) { // we advanced too far + if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { + continue advanceHead; + } else { + break advanceHead; + } + } } - return true; - } - @Override - public int nextInterval() throws IOException { - final PostingsAndPosition lead = postings[0]; - if (lead.upTo == lead.freq) - return IntervalIterator.NO_MORE_INTERVALS; + freq += 1; + if (needsScores == false) { + break; + } + if (lead.upTo == lead.freq) { + break; + } lead.pos = lead.postings.nextPosition(); lead.upTo += 1; - - advanceHead: - while (true) { - final int phrasePos = lead.pos - lead.offset; - for (int j = 1; j < postings.length; ++j) { - final PostingsAndPosition posting = postings[j]; - final int expectedPos = phrasePos + posting.offset; - - // advance up to the same position as the lead - if (advancePosition(posting, expectedPos) == false) { - return IntervalIterator.NO_MORE_INTERVALS; - } - - if (posting.pos != expectedPos) { // we advanced too far - if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { - continue advanceHead; - } else { - return IntervalIterator.NO_MORE_INTERVALS; - } - } - } - return lead.pos; - } } + + return this.freq = freq; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index 1fcac3a05107..c8b34381b2b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -45,11 +45,6 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 4ca9f6e01aa3..358aee4997ac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -43,8 +43,13 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - return in.reset(doc); + public DocIdSetIterator approximation() { + return in.approximation(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return in.advanceTo(doc); } @Override @@ -56,4 +61,9 @@ public int nextInterval() throws IOException { public float score() { return in.score(); } + + @Override + public float cost() { + return in.cost(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 6de7e107300e..7bcb1ce4a64b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -67,11 +67,6 @@ public final int docID() { return in.docID(); } - @Override - public IntervalIterator intervals(String field) { - return in.intervals(field); - } - @Override public final DocIdSetIterator iterator() { return in.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 9011183f7aca..da5ed036ddc0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -36,7 +36,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -698,7 +697,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.minRequiredPostings() == PostingsEnum.NONE && queryCache != null) { + if (scoreMode.needsScores() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index deb842cb1a49..b968f88ffd96 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -22,7 +22,7 @@ /** * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function */ -public abstract class IntervalFilter implements IntervalIterator { +public abstract class IntervalFilter extends FilterIntervalIterator { /** * Filter an {@link IntervalIterator} by its outer width, ie the distance between the @@ -52,13 +52,11 @@ protected boolean accept() { }; } - private final IntervalIterator in; - /** * Create a new filter */ public IntervalFilter(IntervalIterator in) { - this.in = in; + super(in); } /** @@ -76,23 +74,4 @@ public final int nextInterval() throws IOException { return next; } - @Override - public final int start() { - return in.start(); - } - - @Override - public final int end() { - return in.end(); - } - - @Override - public int innerWidth() { - return in.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - return in.reset(doc); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 0a47ca1483fe..ff2a338bae8a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -98,15 +98,13 @@ public int hashCode() { private static IntervalIterator orderedIntervalIterator(List subIterators) { for (IntervalIterator it : subIterators) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; + if (it == null) + return null; } return new OrderedIntervalIterator(subIterators); } - private static class OrderedIntervalIterator implements IntervalIterator { - - final List subIntervals; + private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { int start; int end; @@ -114,7 +112,7 @@ private static class OrderedIntervalIterator implements IntervalIterator { int i; private OrderedIntervalIterator(List subIntervals) { - this.subIntervals = subIntervals; + super(subIntervals); } @Override @@ -133,15 +131,10 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - boolean positioned = true; - for (IntervalIterator it : subIntervals) { - positioned &= it.reset(doc); - } - subIntervals.get(0).nextInterval(); + public void reset() throws IOException { + subIterators.get(0).nextInterval(); i = 1; start = end = innerWidth = Integer.MIN_VALUE; - return positioned; } @Override @@ -150,23 +143,23 @@ public int nextInterval() throws IOException { int b = Integer.MAX_VALUE; while (true) { while (true) { - if (subIntervals.get(i - 1).end() >= b) + if (subIterators.get(i - 1).end() >= b) return start; - if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end()) break; do { - if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) return start; } - while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + while (subIterators.get(i).start() <= subIterators.get(i - 1).end()); i++; } - start = subIntervals.get(0).start(); - end = subIntervals.get(subIntervals.size() - 1).end(); - b = subIntervals.get(subIntervals.size() - 1).start(); - innerWidth = b - subIntervals.get(0).end() - 1; + start = subIterators.get(0).start(); + end = subIterators.get(subIterators.size() - 1).end(); + b = subIterators.get(subIterators.size() - 1).start(); + innerWidth = b - subIterators.get(0).end() - 1; i = 1; - if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; } } @@ -227,14 +220,10 @@ public int hashCode() { } private static IntervalIterator unorderedIntervalIterator(List subIntervals) { - for (IntervalIterator it : subIntervals) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } return new UnorderedIntervalIterator(subIntervals); } - private static class UnorderedIntervalIterator implements IntervalIterator { + private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator { private final PriorityQueue queue; private final IntervalIterator[] subIterators; @@ -242,6 +231,7 @@ private static class UnorderedIntervalIterator implements IntervalIterator { int start, end, innerStart, innerEnd, queueEnd; UnorderedIntervalIterator(List subIterators) { + super(subIterators); this.queue = new PriorityQueue(subIterators.size()) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @@ -271,24 +261,17 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { + public void reset() throws IOException { this.queue.clear(); this.queueEnd = start = end = innerEnd = innerStart = -1; - boolean positioned = true; for (IntervalIterator subIterator : subIterators) { - if (subIterator.reset(doc)) { - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); - } - } - else { - positioned = false; + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); } } - return positioned; } void updateRightExtreme(IntervalIterator it) { @@ -337,7 +320,7 @@ public IntervalIterator apply(List iterators) { throw new IllegalStateException("CONTAINING function requires two iterators"); IntervalIterator a = iterators.get(0); IntervalIterator b = iterators.get(1); - return new IntervalIterator() { + return new ConjunctionIntervalIterator(iterators) { boolean bpos; @@ -357,9 +340,8 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); - return a.reset(doc); + public void reset() { + bpos = true; } @Override @@ -390,7 +372,7 @@ public IntervalIterator apply(List iterators) { throw new IllegalStateException("CONTAINED_BY function requires two iterators"); IntervalIterator a = iterators.get(0); IntervalIterator b = iterators.get(1); - return new IntervalIterator() { + return new ConjunctionIntervalIterator(iterators) { boolean bpos; @@ -410,9 +392,8 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); - return a.reset(doc); + public void reset() throws IOException { + bpos = true; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 22b6c043ce4f..d33a47530031 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -31,6 +31,10 @@ public interface IntervalIterator { */ int NO_MORE_INTERVALS = Integer.MAX_VALUE; + DocIdSetIterator approximation(); + + boolean advanceTo(int doc) throws IOException; + /** * The start of the current interval */ @@ -46,13 +50,6 @@ public interface IntervalIterator { */ int innerWidth(); - /** - * Called to reset the iterator on a new document - * - * @return true if the iterator's parent Scorer is positioned on the given doc id - */ - boolean reset(int doc) throws IOException; - /** * Advance the iterator to the next interval * @@ -68,36 +65,6 @@ default float score() { return (float) (1.0 / (1 + innerWidth())); } - /** - * An empty iterator that always returns {@code false} from {@link #reset(int)} and - * {@link IntervalIterator#NO_MORE_INTERVALS} from {@link #nextInterval()} - */ - IntervalIterator EMPTY = new IntervalIterator() { - - @Override - public int start() { - return -1; - } - - @Override - public int end() { - return -1; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) { - return false; - } - - @Override - public int nextInterval() { - return NO_MORE_INTERVALS; - } - }; + float cost(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 3c2683cb6046..1130d327613a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -33,24 +33,21 @@ /** * A query that retrieves documents containing intervals returned from an - * {@link IntervalFunction} over a set of subqueries + * {@link IntervalsSource} */ public final class IntervalQuery extends Query { private final String field; - private final List subQueries; - private final IntervalFunction iteratorFunction; + private final IntervalsSource intervalsSource; /** * Create a new IntervalQuery * @param field the field to query - * @param subQueries the subqueries to generate intervals from - * @param iteratorFunction an {@link IntervalFunction} to combine the intervals from the subqueries + * @param intervalsSource an {@link IntervalsSource} to retrieve intervals from */ - public IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + public IntervalQuery(String field, IntervalsSource intervalsSource) { this.field = field; - this.subQueries = subQueries; - this.iteratorFunction = iteratorFunction; + this.intervalsSource = intervalsSource; } public String getField() { @@ -59,25 +56,18 @@ public String getField() { @Override public String toString(String field) { - return iteratorFunction.toString() + subQueries.stream().map(Object::toString) - .collect(Collectors.joining(",", "(", ")")); + return intervalsSource.toString(); } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - List subWeights = new ArrayList<>(); - for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); - } - return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(field, searcher, subWeights, boost) : null, + return new IntervalWeight(this, scoreMode.needsScores() ? buildSimScorer(searcher, boost) : null, searcher.getSimilarity(), scoreMode); } - static Similarity.SimScorer buildSimScorer(String field, IndexSearcher searcher, List subWeights, float boost) throws IOException { + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) throws IOException { Set terms = new HashSet<>(); - for (Weight w : subWeights) { - w.extractTerms(terms); - } + intervalsSource.extractTerms(field, terms); TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; for (Term term : terms) { @@ -96,25 +86,22 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; IntervalQuery that = (IntervalQuery) o; return Objects.equals(field, that.field) && - Objects.equals(subQueries, that.subQueries) && - Objects.equals(iteratorFunction, that.iteratorFunction); + Objects.equals(intervalsSource, that.intervalsSource); } @Override public int hashCode() { - return Objects.hash(field, subQueries, iteratorFunction); + return Objects.hash(field, intervalsSource); } private class IntervalWeight extends Weight { - final List subWeights; final Similarity.SimScorer simScorer; final Similarity similarity; final ScoreMode scoreMode; - public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { + public IntervalWeight(Query query, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { super(query); - this.subWeights = subWeights; this.simScorer = simScorer; this.similarity = similarity; this.scoreMode = scoreMode; @@ -122,9 +109,7 @@ public IntervalWeight(Query query, List subWeights, Similarity.SimScorer @Override public void extractTerms(Set terms) { - for (Weight w : subWeights) { - w.extractTerms(terms); - } + intervalsSource.extractTerms(field, terms); } @Override @@ -141,30 +126,16 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio @Override public Scorer scorer(LeafReaderContext context) throws IOException { - List subIntervals = new ArrayList<>(); - List disis = new ArrayList<>(); - for (Weight w : subWeights) { - Scorer scorer = w.scorer(context); - if (scorer == null) - return null; - disis.add(scorer.iterator()); - IntervalIterator it = scorer.intervals(field); - if (it == null) - return null; - subIntervals.add(it); - } - IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); + IntervalIterator intervals = intervalsSource.intervals(field, context); + if (intervals == null) + return null; LeafSimScorer leafScorer = simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); - return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); + return new IntervalScorer(this, intervals, leafScorer); } @Override public boolean isCacheable(LeafReaderContext ctx) { - for (Weight w : subWeights) { - if (w.isCacheable(ctx) == false) - return false; - } return true; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 8d15dcf626fa..d70e93d7705c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -21,21 +21,18 @@ class IntervalScorer extends Scorer { - protected final IntervalIterator intervals; - private final String field; - protected final DocIdSetIterator approximation; + private final IntervalIterator intervals; + private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; private float freq = -1; private int lastScoredDoc = -1; - protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, - IntervalIterator intervals, LeafSimScorer simScorer) { + protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = approximation; + this.approximation = intervals.approximation(); this.simScorer = simScorer; - this.field = field; } @Override @@ -74,13 +71,6 @@ private void ensureFreq() throws IOException { } } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new CachedIntervalIterator(intervals, this); - return null; - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -91,12 +81,12 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + return intervals.advanceTo(docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override public float matchCost() { - return 0; + return intervals.cost(); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index bc0455289240..175b50762316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,6 +19,8 @@ import java.util.Arrays; +import org.apache.lucene.util.BytesRef; + /** * Constructor functions for interval-based queries * @@ -31,189 +33,185 @@ public final class Intervals { private Intervals() {} + public static IntervalsSource term(BytesRef term) { + return new TermIntervalsSource(term); + } + + public static IntervalsSource term(String term) { + return new TermIntervalsSource(new BytesRef(term)); + } + + public static IntervalsSource phrase(String... terms) { + IntervalsSource[] sources = new IntervalsSource[terms.length]; + int i = 0; + for (String term : terms) { + sources[i] = term(term); + i++; + } + return orderedNear(0, sources); + } + + public static IntervalsSource or(IntervalsSource... subSources) { + if (subSources.length == 1) + return subSources[0]; + return new DisjunctionIntervalsSource(Arrays.asList(subSources)); + } + /** - * Create an ordered query with a maximum width + * Create an ordered {@link IntervalsSource} with a maximum width * - * Matches documents in which the subqueries all match in the given order, and - * in which the width of the interval over which the queries match is less than + * Returns intervals in which the subsources all appear in the given order, and + * in which the width of the interval over which the subsources appear is less than * the defined width * - * @param field the field to query * @param width the maximum width of subquery-spanning intervals that will match - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); + public static IntervalsSource orderedNear(int width, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(0, width)); } /** - * Create an ordered query with a defined width range + * Create an ordered {@link IntervalsSource} with a defined width range * - * Matches documents in which the subqueries all match in the given order, and in - * which the width of the interval over which the queries match is between the + * Returns intervals in which the subsources all appear in the given order, and in + * which the width of the interval over which the subsources appear is between the * minimum and maximum defined widths * - * @param field the field to query * @param minWidth the minimum width of subquery-spanning intervals that will match * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource orderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } /** - * Create an ordered query with an unbounded width range + * Create an ordered {@link IntervalsSource} with an unbounded width range * - * Matches documents in which the subqueries all match in the given order + * Returns intervals in which the subsources all appear in the given order * - * @param field the field to query - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); + public static IntervalsSource ordered(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED); } /** - * Create an unordered query with a maximum width + * Create an unordered {@link IntervalsSource} with a maximum width * - * Matches documents in which the subqueries all match in any order, and in which - * the width of the interval over which the queries match is less than the + * Returns intervals in which the subsources all appear in any order, and in which + * the width of the interval over which the subsources appear is less than the * defined width * - * @param field the field to query * @param width the maximum width of subquery-spanning intervals that will match - * @param subQueries an unordered set of queries + * @param subSources an unordered set of queries */ - public static Query unorderedQuery(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); + public static IntervalsSource unorderedNear(int width, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(0, width)); } /** - * Create an unordered query with a defined width range + * Create an unordered {@link IntervalsSource} with a defined width range * - * Matches documents in which the subqueries all match in any order, and in which - * the width of the interval over which the queries match is between the minimum + * Returns intervals in which the subsources all appear in any order, and in which + * the width of the interval over which the subsources appear is between the minimum * and maximum defined widths * - * @param field the field to query * @param minWidth the minimum width of subquery-spanning intervals that will match * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subQueries an unordered set of queries + * @param subSources an unordered set of subsources */ - public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource unorderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } /** - * Create an unordered query with an unbounded width range + * Create an unordered {@link IntervalsSource} with an unbounded width range * - * Matches documents in which all the subqueries match. This is essence a pure conjunction - * query, but it will expose iterators over those conjunctions that may then be further - * nested in other interval queries + * Returns intervals in which all the subsources appear. * - * @param field the field to query - * @param subQueries an unordered set of queries + * @param subSources an unordered set of queries */ - public static Query unorderedQuery(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); + public static IntervalsSource unordered(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.UNORDERED); } /** - * Create a non-overlapping query + * Create a non-overlapping IntervalsSource * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query overlap with intervals from the subtrahend query - * - * Exposes matching intervals from the minuend - * - * @param field the field to query - * @param minuend the query to filter - * @param subtrahend the query to filter by + * Returns intervals of the minuend that do not overlap with intervals from the subtrahend + + * @param minuend the {@link IntervalsSource} to filter + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); + public static IntervalsSource nonOverlapping(IntervalsSource minuend, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } /** - * Create a not-within query - * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query appear within a set number of positions of intervals from the subtrahend query + * Create a not-within {@link IntervalsSource} * - * Exposes matching intervals from the minuend + * Returns intervals of the minuend that do not appear within a set number of positions of + * intervals from the subtrahend query * - * @param field the field to query - * @param minuend the query to filter + * @param minuend the {@link IntervalsSource} to filter * @param positions the maximum distance that intervals from the minuend may occur from intervals * of the subtrahend - * @param subtrahend the query to filter by + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); + public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); } /** - * Create a not-containing query + * Create a not-containing {@link IntervalsSource} * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query are contained within an interval of the subtrahend query + * Returns intervals from the minuend that do not contain intervals of the subtrahend * - * Exposes matching intervals from the minuend - * - * @param field the field to query - * @param minuend the query to filter - * @param subtrahend the query to filter by + * @param minuend the {@link IntervalsSource} to filter + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); + public static IntervalsSource notContaining(IntervalsSource minuend, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); } /** - * Create a containing query - * - * Matches documents where intervals of the big query contain one or more intervals from - * the small query + * Create a containing {@link IntervalsSource} * - * Exposes matching intervals from the big query + * Returns intervals from the big source that contain one or more intervals from + * the small source * - * @param field the field to query - * @param big the query to filter - * @param small the query to filter by + * @param big the {@link IntervalsSource} to filter + * @param small the {@link IntervalsSource} to filter by */ - public static Query containingQuery(String field, Query big, Query small) { - return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); + public static IntervalsSource containing(IntervalsSource big, IntervalsSource small) { + return new ConjunctionIntervalsSource(Arrays.asList(big, small), IntervalFunction.CONTAINING); } /** - * Create a not-contained-by query + * Create a not-contained-by {@link IntervalsSource} * - * Matches documents that match the small query, except when the intervals of the small - * query are contained within an interval of the big query + * Returns intervals from the small {@link IntervalsSource} that do not appear within + * intervals from the big {@link IntervalsSource}. * - * Exposes matching intervals from the small query - * - * @param field the field to query - * @param small the query to filter - * @param big the query to filter by + * @param small the {@link IntervalsSource} to filter + * @param big the {@link IntervalsSource} to filter by */ - public static Query notContainedByQuery(String field, Query small, Query big) { - return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); + public static IntervalsSource notContainedBy(IntervalsSource small, IntervalsSource big) { + return new DifferenceIntervalsSource(small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } /** - * Create a contained-by query - * - * Matches documents where intervals of the small query occur within intervals of the big query + * Create a contained-by {@link IntervalsSource} * - * Exposes matching intervals from the small query + * Returns intervals from the small query that appear within intervals of the big query * - * @param field the field to query - * @param small the query to filter - * @param big the query to filter by + * @param small the {@link IntervalsSource} to filter + * @param big the {@link IntervalsSource} to filter by */ - public static Query containedByQuery(String field, Query small, Query big) { - return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); + public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource big) { + return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java similarity index 54% rename from lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java rename to lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 5501cffae316..3bdf1e50655d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -18,37 +18,23 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Set; -/** - * An interval iterator which caches its first invocation. - * - * Useful for two-phase queries that confirm matches by checking that at least one - * interval exists in a given document - */ -class CachedIntervalIterator extends FilterIntervalIterator { +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; - final Scorer scorer; +public abstract class IntervalsSource { - private boolean started = false; + public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; - CachedIntervalIterator(IntervalIterator in, Scorer scorer) { - super(in); - this.scorer = scorer; - } + @Override + public abstract int hashCode(); @Override - public boolean reset(int doc) throws IOException { - // inner iterator already reset() in TwoPhaseIterator.matches() - started = false; - return doc == scorer.docID(); - } + public abstract boolean equals(Object other); @Override - public int nextInterval() throws IOException { - if (started == false) { - started = true; - return start(); - } - return in.nextInterval(); - } + public abstract String toString(); + + public abstract void extractTerms(String field, Set terms); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index fad7d9c99989..6ffbe340144e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -20,9 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.IdentityHashMap; import java.util.List; -import java.util.Map; import java.util.stream.LongStream; import java.util.stream.StreamSupport; @@ -327,32 +325,6 @@ public float score() throws IOException { return (float) score; } - @Override - public IntervalIterator intervals(String field) { - Map its = new IdentityHashMap<>(); - for (DisiWrapper s = lead; s != null; s = s.next) { - IntervalIterator it = s.scorer.intervals(field); - if (it != null) { - its.put(s, it); - } - } - if (its.size() == 0) - return null; - return new DisjunctionIntervalIterator(its.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - updateFreq(); - for (DisiWrapper s = lead; s != null; s = s.next) { - IntervalIterator it = its.get(s); - if (it.reset(doc)) { - it.nextInterval(); - queue.add(it); - } - } - } - }; - } - @Override public float getMaxScore(int upTo) throws IOException { // TODO: implement but be careful about floating-point errors. diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 7df670d73f19..65d6631e9a7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,13 +193,11 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; - private final int postingsFlags; public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; - this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -267,7 +265,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, this.postingsFlags)); + postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -292,11 +290,11 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { - return new ExactPhraseScorer(this, field, postingsFreqs, + return new ExactPhraseScorer(this, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, field, postingsFreqs, slop, + return new SloppyPhraseScorer(this, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index d39cec2293e9..640cd5f20e45 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -24,8 +24,7 @@ * Position of a term in a document that takes into account the term offset within the phrase. */ final class PhrasePositions { - int realPosition; // position in doc - int position; // position in phrase + int position; // position in doc int count; // remaining pos in this doc int offset; // position in phrase final int ord; // unique across all PhrasePositions instances @@ -55,8 +54,7 @@ final void firstPosition() throws IOException { */ final boolean nextPosition() throws IOException { if (count-- > 0) { // read subsequent pos's - realPosition = postings.nextPosition(); - position = realPosition - offset; + position = postings.nextPosition() - offset; return true; } else return false; diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index a4ff6150815d..ff1538820d61 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,7 +353,6 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; - private final int postingsFlags; private transient TermStates states[]; public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) @@ -366,7 +365,6 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; - this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -424,7 +422,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, postingsFlags); + PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -435,11 +433,11 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { // optimize exact case - return new ExactPhraseScorer(this, field, postingsFreqs, + return new ExactPhraseScorer(this, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, field, postingsFreqs, slop, + return new SloppyPhraseScorer(this, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index f4f91b22e0db..987293eb0476 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -61,18 +61,6 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { return it == null || it.matches(); } - @Override - public IntervalIterator intervals(String field) { - return new FilterIntervalIterator(reqScorer.intervals(field)) { - @Override - public boolean reset(int doc) throws IOException { - if (doc == ReqExclScorer.this.docID()) - return in.reset(doc); - return false; - } - }; - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 3069cb1ec2da..6d93a54560d4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -184,26 +184,6 @@ public DocIdSetIterator iterator() { } } - @Override - public IntervalIterator intervals(String field) { - IntervalIterator reqIntervals = reqScorer.intervals(field); - IntervalIterator optIntervals = optScorer.intervals(field); - if (optIntervals == null) - return reqIntervals; - if (reqIntervals == null) - return optIntervals; - return new DisjunctionIntervalIterator(2) { - @Override - protected void fillQueue(int doc) throws IOException { - reqIntervals.reset(doc); - queue.add(reqIntervals); - positionOptionalScorers(); - if (optIntervals.reset(doc)) - queue.add(optIntervals); - } - }; - } - @Override public int docID() { return reqScorer.docID(); @@ -212,17 +192,9 @@ public int docID() { @Override public float score() throws IOException { // TODO: sum into a double and cast to float if we ever send required clauses to BS1 - positionOptionalScorers(); + int curDoc = reqScorer.docID(); float score = reqScorer.score(); - if (optScorer.docID() == reqScorer.docID()) { - score += optScorer.score(); - } - return score; - } - - private void positionOptionalScorers() throws IOException { - int curDoc = reqScorer.docID(); int optScorerDoc = optApproximation.docID(); if (optScorerDoc < curDoc) { optScorerDoc = optApproximation.advance(curDoc); @@ -230,6 +202,11 @@ private void positionOptionalScorers() throws IOException { optScorerDoc = optApproximation.nextDoc(); } } + if (optScorerDoc == curDoc) { + score += optScorer.score(); + } + + return score; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 815286adddc9..31a5d108fc37 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -16,8 +16,6 @@ */ package org.apache.lucene.search; -import org.apache.lucene.index.PostingsEnum; - /** * Different modes of search. */ @@ -31,11 +29,6 @@ public enum ScoreMode { public boolean needsScores() { return true; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.FREQS; - } }, /** @@ -47,26 +40,6 @@ public int minRequiredPostings() { public boolean needsScores() { return false; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.NONE; - } - }, - - /** - * Produced scorers will allow visiting all matches, and expose positions - */ - COMPLETE_POSITIONS { - @Override - public boolean needsScores() { - return false; - } - - @Override - public int minRequiredPostings() { - return PostingsEnum.POSITIONS; - } }, /** @@ -78,22 +51,10 @@ public int minRequiredPostings() { public boolean needsScores() { return true; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.FREQS; - } }; /** * Whether this {@link ScoreMode} needs to compute scores. */ public abstract boolean needsScores(); - - /** - * The minimum flags to be passed to {@link org.apache.lucene.index.TermsEnum#postings(PostingsEnum, int)} - */ - public abstract int minRequiredPostings(); - - } diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 9e1d46c8ecfc..81624ccac9d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,20 +125,6 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); - /** - * Return a {@link IntervalIterator} over matching intervals for a given field - * - * Consumers should call {@link IntervalIterator#reset(int)} when the parent - * Scorer's {@link DocIdSetIterator} has moved to a new document, and then - * iterate over the intervals by repeatedly calling {@link IntervalIterator#nextInterval()} - * until {@link IntervalIterator#NO_MORE_INTERVALS} is returned. - * - * @param field The field to retrieve intervals for - * @return an {@link IntervalIterator}, or {@code null} if no intervals are available - * for the given field - */ - public abstract IntervalIterator intervals(String field); - /** * Optional method: Return a {@link TwoPhaseIterator} view of this * {@link Scorer}. A return value of {@code null} indicates that @@ -192,5 +178,4 @@ public int advanceShallow(int target) throws IOException { * included and {@code upTo} included. */ public abstract float getMaxScore(int upTo) throws IOException; - } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index de9546be3938..7587b37889b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -32,8 +32,6 @@ final class SloppyPhraseScorer extends Scorer { private final DocIdSetIterator conjunction; private final PhrasePositions[] phrasePositions; - private final IntervalIterator intervals; - private final String field; private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). @@ -42,8 +40,6 @@ final class SloppyPhraseScorer extends Scorer { private final int slop; private final int numPostings; private final PhraseQueue pq; // for advancing min position - - private int start, currentEnd, nextEnd; private int end; // current largest phrase position @@ -57,15 +53,13 @@ final class SloppyPhraseScorer extends Scorer { final boolean needsScores; private final float matchCost; - SloppyPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, + SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, int slop, LeafSimScorer docScorer, boolean needsScores, float matchCost) { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; this.slop = slop; - this.field = field; - this.intervals = new SloppyIntervalIterator(); this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length]; @@ -79,72 +73,61 @@ final class SloppyPhraseScorer extends Scorer { this.matchCost = matchCost; } - private class SloppyIntervalIterator implements IntervalIterator { - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return currentEnd; - } - - @Override - public int innerWidth() { - return currentEnd - start; - } - - @Override - public boolean reset(int doc) throws IOException { - start = currentEnd = nextEnd = -1; - return initPhrasePositions(); + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
    The score contribution of a match depends on the distance: + *
    - highest score for distance=0 (exact match). + *
    - score gets lower as distance gets higher. + *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
    Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ + private float phraseFreq() throws IOException { + if (!initPhrasePositions()) { + return 0.0f; } - - @Override - public int nextInterval() throws IOException { - if (pq.size() < phrasePositions.length) - return IntervalIterator.NO_MORE_INTERVALS; - currentEnd = nextEnd; - PhrasePositions pp = pq.pop(); - start = pp.realPosition; - int matchLength = end - pp.position; - int next = pq.top().position; - int nextStart = pq.top().realPosition; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { - break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length - if (matchLength <= slop) { - pq.add(pp); - if (pp.realPosition > nextEnd) - nextEnd = pp.realPosition; - return start; - } - pq.add(pp); - pp = pq.pop(); - next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { - matchLength = matchLength2; - } - if (pp.realPosition > nextStart) { - start = nextStart; - } - else { - start = pp.realPosition; + float freq = 0.0f; + numMatches = 0; + PhrasePositions pp = pq.pop(); + int matchLength = end - pp.position; + int next = pq.top().position; + while (advancePP(pp)) { + if (hasRpts && !advanceRpts(pp)) { + break; // pps exhausted + } + if (pp.position > next) { // done minimizing current match-length + if (matchLength <= slop) { + freq += (1.0 / (1.0 + matchLength)); // score match + numMatches++; + if (!needsScores) { + return freq; } + } + pq.add(pp); + pp = pq.pop(); + next = pq.top().position; + matchLength = end - pp.position; + } else { + int matchLength2 = end - pp.position; + if (matchLength2 < matchLength) { + matchLength = matchLength2; } } - if (matchLength <= slop) { - return start; - } - return IntervalIterator.NO_MORE_INTERVALS; } + if (matchLength <= slop) { + freq += (1.0 / (1.0 + matchLength)); // score match + numMatches++; + } + return freq; } /** advance a PhrasePosition and update 'end', return false if exhausted */ @@ -259,9 +242,6 @@ private void initSimple() throws IOException { if (pp.position > end) { end = pp.position; } - if (pp.realPosition > nextEnd) { - nextEnd = pp.realPosition; - } pq.add(pp); } } @@ -291,9 +271,6 @@ private void fillQueue() { if (pp.position > end) { end = pp.position; } - if (pp.realPosition > nextEnd) { - nextEnd = pp.realPosition; - } pq.add(pp); } } @@ -538,13 +515,11 @@ private HashMap termGroups(LinkedHashMap tord, Array return tg; } - int freq() throws IOException { - ensureFreq(); + int freq() { return numMatches; } - float sloppyFreq() throws IOException { - ensureFreq(); + float sloppyFreq() { return sloppyFreq; } @@ -568,36 +543,8 @@ float sloppyFreq() throws IOException { // } // } // } - - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
    The score contribution of a match depends on the distance: - *
    - highest score for distance=0 (exact match). - *
    - score gets lower as distance gets higher. - *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
    Possibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - private void ensureFreq() throws IOException { - if (sloppyFreq == -1) { - numMatches = 1; - sloppyFreq = intervals.score(); - while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - sloppyFreq += intervals.score(); - numMatches++; - } - } - } - + + @Override public int docID() { return conjunction.docID(); @@ -605,7 +552,6 @@ public int docID() { @Override public float score() throws IOException { - ensureFreq(); return docScorer.score(docID(), sloppyFreq); } @@ -617,21 +563,13 @@ public float getMaxScore(int upTo) throws IOException { @Override public String toString() { return "scorer(" + weight + ")"; } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new CachedIntervalIterator(intervals, this); - return null; - } - @Override public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { @Override public boolean matches() throws IOException { - sloppyFreq = -1; - intervals.reset(docID()); - return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + sloppyFreq = phraseFreq(); // check for phrase + return sloppyFreq != 0F; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 00ab66610914..2a7c450805d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -29,7 +29,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; @@ -209,7 +208,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, simScorer)); + subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java new file mode 100644 index 000000000000..c60af1273b3f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +class TermIntervalsSource extends IntervalsSource { + + final BytesRef term; + + TermIntervalsSource(BytesRef term) { + this.term = term; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + Terms terms = ctx.reader().terms(field); + if (terms == null) + return null; + TermsEnum te = terms.iterator(); + te.seekExact(term); + PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); + float cost = PhraseQuery.termPositionsCost(te); + return new IntervalIterator() { + + int pos, upto; + + @Override + public DocIdSetIterator approximation() { + return pe; + } + + @Override + public boolean advanceTo(int doc) throws IOException { + pos = -1; + if (pe.docID() > doc || (pe.docID() != doc && pe.advance(doc) != doc)) { + upto = -1; + return false; + } + else { + upto = pe.freq(); + return true; + } + } + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 1; + } + + @Override + public int nextInterval() throws IOException { + if (upto <= 0) + return pos = NO_MORE_INTERVALS; + upto--; + return pos = pe.nextPosition(); + } + + @Override + public float cost() { + return cost; + } + + @Override + public String toString() { + return pe.docID() + ":" + pos; + } + }; + } + + @Override + public int hashCode() { + return Objects.hash(term); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TermIntervalsSource that = (TermIntervalsSource) o; + return Objects.equals(term, that.term); + } + + @Override + public String toString() { + return term.utf8ToString(); + } + + @Override + public void extractTerms(String field, Set terms) { + terms.add(new Term(field, term)); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 79dd976b7789..f1f44154f554 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -48,7 +48,7 @@ final class TermWeight extends Weight { private final ScoreMode scoreMode; public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, - float boost, TermStates termStates) throws IOException { + float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); @@ -98,7 +98,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, scorer); + return new TermScorer(this, termsEnum, scoreMode, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 79f00d30cb57..d51626fda8c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -27,13 +27,11 @@ /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { - private final PostingsEnum postingsEnum; private final ImpactsEnum impactsEnum; private final DocIdSetIterator iterator; private final LeafSimScorer docScorer; private float minCompetitiveScore; - private final String field; /** * Construct a TermScorer. @@ -45,12 +43,11 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; - this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), scoreMode.minRequiredPostings()); + impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,7 +104,7 @@ public long cost() { } }; } else { - postingsEnum = te.postings(null, scoreMode.minRequiredPostings()); + postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } @@ -127,14 +124,6 @@ public DocIdSetIterator iterator() { return iterator; } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) { - return new TermIntervalIterator(postingsEnum); - } - return null; - } - @Override public float score() throws IOException { assert docID() != DocIdSetIterator.NO_MORE_DOCS; @@ -159,56 +148,4 @@ public void setMinCompetitiveScore(float minScore) { /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; } - - private static class TermIntervalIterator implements IntervalIterator { - - public TermIntervalIterator(PostingsEnum pe) { - this.pe = pe; - } - - private final PostingsEnum pe; - - int upTo = -1; - int pos = -1; - - @Override - public int start() { - return pos; - } - - @Override - public int end() { - return pos; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) throws IOException { - if (pe.docID() == doc) { - upTo = pe.freq(); - pos = -1; - return true; - } - upTo = -1; - return false; - } - - @Override - public int nextInterval() throws IOException { - if (upTo <= 0) { - return pos = NO_MORE_INTERVALS; - } - upTo--; - return pos = pe.nextPosition(); - } - - @Override - public String toString() { - return pe.docID() + "[" + pos + "]"; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f4ef706fcf1d..f7a88f15927e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -440,11 +440,6 @@ private void advanceAllTail() throws IOException { assert ensureConsistent(); } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public float score() throws IOException { // we need to know about all matches diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 3106e1962a59..7853ccf2465b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 2ec0c5d2b372..666f163742a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,7 +21,6 @@ import java.util.Objects; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IntervalIterator; import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; @@ -32,7 +31,6 @@ */ public class SpanScorer extends Scorer { - protected final String field; protected final Spans spans; protected final LeafSimScorer docScorer; @@ -43,11 +41,10 @@ public class SpanScorer extends Scorer { private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for /** Sole constructor. */ - public SpanScorer(SpanWeight weight, String field, Spans spans, LeafSimScorer docScorer) { + public SpanScorer(SpanWeight weight, Spans spans, LeafSimScorer docScorer) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; - this.field = field; } /** return the Spans for this Scorer **/ @@ -60,13 +57,6 @@ public int docID() { return spans.docID(); } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new SpanIntervalIterator(); - return null; - } - @Override public DocIdSetIterator iterator() { return spans; @@ -156,32 +146,4 @@ final float sloppyFreq() throws IOException { return freq; } - private class SpanIntervalIterator implements IntervalIterator { - - @Override - public int start() { - return spans.startPosition(); - } - - @Override - public int end() { - return spans.endPosition() - 1; - } - - @Override - public int innerWidth() { - return spans.width(); - } - - @Override - public boolean reset(int doc) throws IOException { - return spans.docID() == doc; - } - - @Override - public int nextInterval() throws IOException { - return spans.nextStartPosition(); - } - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index f19ca742076f..25b58fdc39a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -130,7 +130,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, field, spans, docScorer); + return new SpanScorer(this, spans, docScorer); } /** diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index f80bd5d82d63..1657f9b9ced1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -189,11 +189,6 @@ public int docID() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } } static final class JustCompileSimilarity extends Similarity { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java index 21b2ea3285f2..3118fa85394c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java @@ -59,11 +59,6 @@ public DocIdSetIterator iterator() { return it; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public String toString() { return "FakeScorer(cost=" + it.cost() + ")"; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index 3933b07e02c8..12136b5b318a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -44,11 +44,6 @@ private MockScorer() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } private static class NoOpCollector extends SimpleCollector { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java index f105216baae5..083ac248df91 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java @@ -150,11 +150,6 @@ public float score() throws IOException { public float getMaxScore(int upTo) throws IOException { return 0; } - - @Override - public IntervalIterator intervals(String field) { - return null; - } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 535d31dd0fa2..33fd8c18065f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = new IntervalQuery(field, Intervals.phrase("w2", "w3")); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,100 +85,76 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(0, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(1, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(2, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = Intervals.orderedQuery(field, 1, - new TermQuery(new Term(field, "w1")), - Intervals.orderedQuery(field, 2, - new TermQuery(new Term(field, "w2")), - new TermQuery(new Term(field, "w3"))) - ); + Query q = new IntervalQuery(field, + Intervals.orderedNear(1, + Intervals.term("w1"), + Intervals.orderedNear(2, Intervals.term("w2"), Intervals.term("w3")))); checkHits(q, new int[]{0, 1, 2}); } - public void testNearPhraseQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, - new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).build(), - new TermQuery(new Term(field, "w4"))); - checkHits(q, new int[]{ 5 }); - } - - public void testSloppyPhraseQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, - new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).setSlop(2).build(), - new TermQuery(new Term(field, "w4"))); - checkHits(q, new int[]{ 0, 5 }); - } - public void testUnorderedQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + Query q = new IntervalQuery(field, Intervals.unordered(Intervals.term("w1"), Intervals.term("w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); } public void testNonOverlappingQuery() throws IOException { - Query q = Intervals.nonOverlappingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w4")))); - + Query q = new IntervalQuery(field, Intervals.nonOverlapping( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w3")), + Intervals.unordered(Intervals.term("w2"), Intervals.term("w4")))); checkHits(q, new int[]{1, 3, 5}); } public void testNotWithinQuery() throws IOException { - Query q = Intervals.notWithinQuery(field, new TermQuery(new Term(field, "w1")), 1, - new TermQuery(new Term(field, "w2"))); + Query q = new IntervalQuery(field, Intervals.notWithin(Intervals.term("w1"), 1, Intervals.term("w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } public void testNotContainingQuery() throws IOException { - Query q = Intervals.notContainingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); - + Query q = new IntervalQuery(field, Intervals.notContaining( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), + Intervals.term("w3") + )); checkHits(q, new int[]{ 0, 2, 4, 5 }); } public void testContainingQuery() throws IOException { - Query q = Intervals.containingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); - + Query q = new IntervalQuery(field, Intervals.containing( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), + Intervals.term("w3") + )); checkHits(q, new int[]{ 1, 3, 5 }); } public void testContainedByQuery() throws IOException { - Query q = Intervals.containedByQuery(field, - new TermQuery(new Term(field, "w3")), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2")))); + Query q = new IntervalQuery(field, Intervals.containedBy( + Intervals.term("w3"), + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")))); checkHits(q, new int[]{ 1, 3, 5 }); } public void testNotContainedByQuery() throws IOException { - Query q = Intervals.notContainedByQuery(field, - new TermQuery(new Term(field, "w2")), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w4")))); + Query q = new IntervalQuery(field, Intervals.notContainedBy( + Intervals.term("w2"), + Intervals.unordered(Intervals.term("w1"), Intervals.term("w4")) + )); checkHits(q, new int[]{ 1, 3, 4, 5 }); } - // contained-by - // not-contained-by - - // TODO: Overlapping } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 339b4f657f62..5f5222ea41d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -84,22 +84,18 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { - Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_POSITIONS, 1f); + private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - Scorer scorer = weight.scorer(ctx); - if (scorer == null) - continue; - assertNull(scorer.intervals(field + "1")); + assertNull(source.intervals(field + "1", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = scorer.intervals(field); - DocIdSetIterator it = scorer.iterator(); - for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { - matchedDocs++; + IntervalIterator intervals = source.intervals(field, ctx); + if (intervals == null) + continue; + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.reset(doc)) { + if (intervals.advanceTo(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); @@ -109,6 +105,8 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i i += 2; } assertEquals(expected[id].length, i); + if (i > 0) + matchedDocs++; } else { assertEquals(0, expected[id].length); @@ -119,7 +117,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i } public void testTermQueryIntervals() throws IOException { - checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", 4, new int[][]{ + checkIntervals(Intervals.term("porridge"), "field1", 4, new int[][]{ {}, { 1, 1, 4, 4, 7, 7 }, { 1, 1, 4, 4, 7, 7 }, @@ -129,38 +127,8 @@ public void testTermQueryIntervals() throws IOException { }); } - public void testExactPhraseQueryIntervals() throws IOException { - checkIntervals(new PhraseQuery.Builder() - .add(new Term("field1", "pease")) - .add(new Term("field1", "porridge")).build(), "field1", 3, new int[][]{ - {}, - { 0, 1, 3, 4, 6, 7 }, - { 0, 1, 3, 4, 6, 7 }, - {}, - { 0, 1, 3, 4, 6, 7 }, - {} - }); - } - - public void testSloppyPhraseQueryIntervals() throws IOException { - checkIntervals(new PhraseQuery.Builder() - .add(new Term("field1", "pease")) - .add(new Term("field1", "porridge")) - .add(new Term("field1", "hot")) - .setSlop(3).build(), "field1", 3, new int[][]{ - {}, - { 0, 2, 1, 3, 2, 4 }, - { 0, 5, 3, 5, 3, 7, 5, 7 }, - {}, - { 0, 2, 1, 3, 2, 4 }, - {} - } - ); - } - public void testOrderedNearIntervals() throws IOException { - checkIntervals(Intervals.orderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + checkIntervals(Intervals.ordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 3, new int[][]{ {}, { 0, 2, 6, 17 }, @@ -172,8 +140,7 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ {}, { 0, 2, 2, 3, 6, 17 }, @@ -185,10 +152,7 @@ public void testUnorderedNearIntervals() throws IOException { } public void testIntervalDisjunction() throws IOException { - checkIntervals(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .build(), "field1", 4, new int[][]{ + checkIntervals(Intervals.or(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, @@ -199,13 +163,8 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), - new TermQuery(new Term("field1", "porridge")), - new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) - .build()), "field1", 3, new int[][]{ + checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.or(Intervals.term("hot"), Intervals.term("cold"))), + "field1", 3, new int[][]{ {}, { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, @@ -215,81 +174,4 @@ public void testNesting() throws IOException { }); } - // x near ((a not b) or (c not d)) - public void testExclusionBooleans() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", - new TermQuery(new Term("field1", "pease")), - new BooleanQuery.Builder() - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field1", "years")), BooleanClause.Occur.MUST_NOT) - .build(), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.MUST_NOT) - .build(), BooleanClause.Occur.SHOULD) - .build()), "field1", 2, new int[][]{ - {}, - { 6, 11 }, - {}, - {}, - { 6, 21 }, - {} - }); - } - - public void testConjunctionBooleans() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", - new TermQuery(new Term("field1", "pease")), - new BooleanQuery.Builder() - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field2", "caverns")), BooleanClause.Occur.MUST) - .build(), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field2", "sunless")), BooleanClause.Occur.MUST) - .build(), BooleanClause.Occur.SHOULD) - .build()), "field1", 2, new int[][]{ - {}, - { 6, 11 }, - { 6, 11 }, - {}, - {}, - {} - }); - } - - public void testMinimumShouldMatch() throws IOException { - checkIntervals(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "porridge")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "fraggle")), BooleanClause.Occur.SHOULD) - .setMinimumNumberShouldMatch(2) - .build(), BooleanClause.Occur.SHOULD) - .build(), "field1", 4, new int[][]{ - {}, - { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 29, 29 }, - { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 27, 27 }, - { 7, 7 }, - { 0, 0, 3, 3, 6, 6 }, - {} - }); - } - - public void testSpanNearQueryEquivalence() throws IOException { - checkIntervals(new SpanNearQuery(new SpanQuery[]{ - new SpanTermQuery(new Term("field1", "pease")), - new SpanTermQuery(new Term("field1", "hot"))}, 100, true), - "field1", 3, new int[][]{ - {}, - {0, 2, 3, 17, 6, 17}, - {0, 5, 3, 5, 6, 21}, - {}, - { 0, 2, 3, 17, 6, 17 }, - { } - }); - } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index 0f90b1c18a94..f60435c57a30 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -402,10 +402,5 @@ public int docID() { } }; } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 81855bb4dc84..9fbd6a46b56f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -69,11 +69,6 @@ public long cost() { } }; } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } // The scores must have positive as well as negative values diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index eb46ab49e466..d1f307d063ec 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -437,11 +437,6 @@ public int docID() { return docID; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 900267166894..257310176740 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -50,11 +50,6 @@ public float getMaxScore(int upTo) throws IOException { @Override public int docID() { return doc; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index c0f6b2401cb1..17c5f85dd898 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -245,11 +245,6 @@ public float getMaxScore(int upTo) throws IOException { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - - @Override - public IntervalIterator intervals(String field) { - return null; - } } public void testSetMinCompetitiveScore() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 6600b6ee92e4..f12e9100d656 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -264,12 +264,7 @@ public int docID() { public DocIdSetIterator iterator() { return scorer.iterator(); } - - @Override - public IntervalIterator intervals(String field) { - return scorer.intervals(field); - } - + }; super.setScorer(s); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index 5b5d5812559b..bd5d927c6275 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -151,7 +151,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); - return new PayloadSpanScorer(this, field, payloadSpans, docScorer); + return new PayloadSpanScorer(this, payloadSpans, docScorer); } @Override @@ -227,8 +227,8 @@ private class PayloadSpanScorer extends SpanScorer { private final PayloadSpans spans; - private PayloadSpanScorer(SpanWeight weight, String field, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { - super(weight, field, spans, docScorer); + private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { + super(weight, spans, docScorer); this.spans = spans; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index dbee623c59b6..a9d3bfb2da9a 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -128,7 +128,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, field, spans, docScorer); + return new SpanScorer(this, spans, docScorer); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index e6eeae907da8..80cd4da7cf0d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -112,11 +112,6 @@ public Collection getChildren() { return Collections.singletonList(new ChildScorer(in, "SHOULD")); } - @Override - public IntervalIterator intervals(String field) { - return in.intervals(field); - } - @Override public int docID() { return in.docID(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 4b982bb45a8d..3b9a740a448f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -196,10 +196,6 @@ public float getMaxScore(int upTo) throws IOException { return max; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java index a8ef239d93cb..9206b0484d4d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java @@ -115,9 +115,4 @@ public long cost() { }; } - @Override - public IntervalIterator intervals(String field) { - return null; - } - } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index fcb48a8d0e84..a050b50401cb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -108,11 +108,6 @@ public float score() throws IOException { return scorer.score(); } - @Override - public IntervalIterator intervals(String field) { - return scorer.intervals(field); - } - @Override public int advanceShallow(int target) throws IOException { return scorer.advanceShallow(target); From 9ec7abad31b81972128c1381ce7a5d1da7634751 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 6 Mar 2018 18:16:43 +0000 Subject: [PATCH 23/83] Fix nested disjunctions (LUCENE-7398) --- .../search/ConjunctionIntervalIterator.java | 4 + .../search/DisjunctionIntervalsSource.java | 90 ++++++++++++++++++- .../apache/lucene/search/IntervalFilter.java | 5 ++ .../lucene/search/IntervalFunction.java | 5 +- .../lucene/search/TestIntervalQuery.java | 12 ++- 5 files changed, 110 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index 53139fe42ec8..f6457d2ffd75 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -63,4 +63,8 @@ public final float cost() { return cost; } + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 053ddd4a02f5..025bba1cdb19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -90,7 +90,10 @@ private static class DisjunctionIntervalIterator implements IntervalIterator { this.intervalQueue = new PriorityQueue(iterators.size()) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + // This is different to the Vigna paper, because we're interested in matching rather + // than in minimizing intervals, so a wider interval should sort before its prefixes + return a.start() < b.start() || (a.start() == b.start() && a.end() > b.end()); + //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; this.disiQueue = new DisiPriorityQueue(iterators.size()); @@ -142,13 +145,13 @@ public boolean advanceTo(int doc) throws IOException { intervalQueue.add(it); } } - current = null; + current = UNPOSITIONED; return intervalQueue.size() > 0; } @Override public int nextInterval() throws IOException { - if (current == null) { + if (current == UNPOSITIONED) { current = intervalQueue.top(); return current.start(); } @@ -160,16 +163,95 @@ public int nextInterval() throws IOException { } } if (intervalQueue.size() == 0) { - current = null; + current = EMPTY; return IntervalIterator.NO_MORE_INTERVALS; } current = intervalQueue.top(); return current.start(); } + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } + private boolean contains(IntervalIterator it, int start, int end) { return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); } } + + private static final IntervalIterator EMPTY = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return false; + } + + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextInterval() throws IOException { + return NO_MORE_INTERVALS; + } + + @Override + public float cost() { + return 0; + } + }; + + private static final IntervalIterator UNPOSITIONED = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return false; + } + + @Override + public int start() { + return -1; + } + + @Override + public int end() { + return -1; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextInterval() throws IOException { + return NO_MORE_INTERVALS; + } + + @Override + public float cost() { + return 0; + } + }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index b968f88ffd96..e903ceb75223 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -49,6 +49,11 @@ protected boolean accept() { int width = innerWidth(); return width >= minWidth && width <= maxWidth; } + + @Override + public String toString() { + return "widthfilter(" + minWidth + "," + maxWidth + "," + in.toString() + ")"; + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index ff2a338bae8a..384e4ac0b691 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -157,7 +157,10 @@ public int nextInterval() throws IOException { start = subIterators.get(0).start(); end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); - innerWidth = b - subIterators.get(0).end() - 1; + innerWidth = 0; + for (int j = 1; j < subIterators.size(); j++) { + innerWidth += subIterators.get(j).start() - subIterators.get(j - 1).end() - 1; + } i = 1; if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 33fd8c18065f..e2ab2af7c972 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -64,7 +64,9 @@ public void tearDown() throws Exception { "w1 xx w2 w4 yy w3", "w1 w3 xx w2 yy w3", "w2 w1", - "w2 w1 w3 w2 w4" + "w2 w1 w3 w2 w4", + "coordinate genome mapping research", + "coordinate genome research" }; private void checkHits(Query query, int[] results) throws IOException { @@ -157,4 +159,12 @@ public void testNotContainedByQuery() throws IOException { checkHits(q, new int[]{ 1, 3, 4, 5 }); } + public void testNestedOr() throws IOException { + Query q = new IntervalQuery(field, Intervals.orderedNear(0, + Intervals.term("coordinate"), + Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")), + Intervals.term("research"))); + checkHits(q, new int[]{ 6, 7 }); + } + } From a7bf7c3ab1a982f965db7b15a12b1a5883e2967f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 03:18:32 +0000 Subject: [PATCH 24/83] Remove slop/innerwidth, add BLOCK and MAXWIDTH --- .../search/DifferenceIntervalFunction.java | 9 - .../search/DisjunctionIntervalsSource.java | 15 -- .../lucene/search/FilterIntervalIterator.java | 5 - .../apache/lucene/search/IntervalFilter.java | 33 ---- .../lucene/search/IntervalFunction.java | 172 ++++++------------ .../lucene/search/IntervalIterator.java | 7 +- .../apache/lucene/search/IntervalQuery.java | 5 +- .../org/apache/lucene/search/Intervals.java | 68 ++----- .../apache/lucene/search/IntervalsSource.java | 2 + .../lucene/search/LowpassIntervalsSource.java | 71 ++++++++ .../lucene/search/TermIntervalsSource.java | 10 +- .../lucene/search/TestIntervalQuery.java | 20 +- 12 files changed, 156 insertions(+), 261 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 2ee7d36d78ac..e5310f9482fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -95,11 +95,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public boolean advanceTo(int doc) throws IOException { bpos = b.advanceTo(doc); @@ -191,10 +186,6 @@ public int end() { return newEnd; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } }; return NON_OVERLAPPING.apply(minuend, notWithin); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 025bba1cdb19..1327c131d894 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -126,11 +126,6 @@ public int end() { return current.end(); } - @Override - public int innerWidth() { - return current.innerWidth(); - } - @Override public boolean advanceTo(int doc) throws IOException { intervalQueue.clear(); @@ -202,11 +197,6 @@ public int end() { return NO_MORE_INTERVALS; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } - @Override public int nextInterval() throws IOException { return NO_MORE_INTERVALS; @@ -239,11 +229,6 @@ public int end() { return -1; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } - @Override public int nextInterval() throws IOException { return NO_MORE_INTERVALS; diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 358aee4997ac..bb4d736fe3ec 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -37,11 +37,6 @@ public int end() { return in.end(); } - @Override - public int innerWidth() { - return in.innerWidth(); - } - @Override public DocIdSetIterator approximation() { return in.approximation(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index e903ceb75223..aa074039bdd5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -24,39 +24,6 @@ */ public abstract class IntervalFilter extends FilterIntervalIterator { - /** - * Filter an {@link IntervalIterator} by its outer width, ie the distance between the - * start and end of the iterator - */ - public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = end() - start(); - return width >= minWidth && width <= maxWidth; - } - }; - } - - /** - * Filter an {@link IntervalIterator} by its inner width, ie the distance between the - * end of its first subiterator and the beginning of its last - */ - public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = innerWidth(); - return width >= minWidth && width <= maxWidth; - } - - @Override - public String toString() { - return "widthfilter(" + minWidth + "," + maxWidth + "," + in.toString() + ")"; - } - }; - } - /** * Create a new filter */ diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 384e4ac0b691..860815ab99c4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -42,73 +42,80 @@ public abstract class IntervalFunction { */ public abstract IntervalIterator apply(List iterators); - /** - * Return an iterator over intervals where the subiterators appear in a given order - */ - public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + public static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { @Override - public IntervalIterator apply(List intervalIterators) { - return orderedIntervalIterator(intervalIterators); + public IntervalIterator apply(List iterators) { + return new BlockIntervalIterator(iterators); } }; - /** - * Return an iterator over intervals where the subiterators appear in a given order, - * filtered by width - */ - public static class OrderedNearFunction extends IntervalFunction { - - /** - * Create a new OrderedNearFunction - * @param minWidth the minimum width of returned intervals - * @param maxWidth the maximum width of returned intervals - */ - public OrderedNearFunction(int minWidth, int maxWidth) { - this.minWidth = minWidth; - this.maxWidth = maxWidth; + private static class BlockIntervalIterator extends ConjunctionIntervalIterator { + + int start, end; + + BlockIntervalIterator(List subIterators) { + super(subIterators); } - final int minWidth; - final int maxWidth; + @Override + protected void reset() throws IOException { + start = end = -1; + } @Override - public IntervalIterator apply(List intervalIterators) { - return IntervalFilter.innerWidthFilter(orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + public int start() { + return start; } @Override - public String toString() { - return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + public int end() { + return end; } @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - OrderedNearFunction that = (OrderedNearFunction) o; - return minWidth == that.minWidth && - maxWidth == that.maxWidth; + public float score() { + return 1; } @Override - public int hashCode() { - return Objects.hash(minWidth, maxWidth); + public int nextInterval() throws IOException { + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + int i = 1; + while (i < subIterators.size()) { + while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) { + if (subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) { + i = i + 1; + } + else { + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + i = 1; + } + } + start = subIterators.get(0).start(); + end = subIterators.get(subIterators.size() - 1).end(); + return start; } } - private static IntervalIterator orderedIntervalIterator(List subIterators) { - for (IntervalIterator it : subIterators) { - if (it == null) - return null; + /** + * Return an iterator over intervals where the subiterators appear in a given order + */ + public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return new OrderedIntervalIterator(intervalIterators); } - return new OrderedIntervalIterator(subIterators); - } + }; private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { int start; int end; - int innerWidth; int i; private OrderedIntervalIterator(List subIntervals) { @@ -125,16 +132,11 @@ public int end() { return end; } - @Override - public int innerWidth() { - return innerWidth; - } - @Override public void reset() throws IOException { subIterators.get(0).nextInterval(); i = 1; - start = end = innerWidth = Integer.MIN_VALUE; + start = end = Integer.MIN_VALUE; } @Override @@ -157,10 +159,6 @@ public int nextInterval() throws IOException { start = subIterators.get(0).start(); end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); - innerWidth = 0; - for (int j = 1; j < subIterators.size(); j++) { - innerWidth += subIterators.get(j).start() - subIterators.get(j - 1).end() - 1; - } i = 1; if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; @@ -174,64 +172,16 @@ public int nextInterval() throws IOException { public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return unorderedIntervalIterator(intervalIterators); + return new UnorderedIntervalIterator(intervalIterators); } }; - /** - * An iterator over intervals where the subiterators appear in any order, within a given width range - */ - public static class UnorderedNearFunction extends IntervalFunction { - - final int minWidth; - final int maxWidth; - - /** - * Create a new UnorderedNearFunction - * @param minWidth the minimum width of the returned intervals - * @param maxWidth the maximum width of the returned intervals - */ - public UnorderedNearFunction(int minWidth, int maxWidth) { - this.minWidth = minWidth; - this.maxWidth = maxWidth; - } - - @Override - public IntervalIterator apply(List intervalIterators) { - return IntervalFilter.innerWidthFilter(unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); - } - - @Override - public String toString() { - return "ONEAR[" + minWidth + "/" + maxWidth + "]"; - } - - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - UnorderedNearFunction that = (UnorderedNearFunction) o; - return minWidth == that.minWidth && - maxWidth == that.maxWidth; - } - - @Override - public int hashCode() { - return Objects.hash(minWidth, maxWidth); - } - } - - private static IntervalIterator unorderedIntervalIterator(List subIntervals) { - return new UnorderedIntervalIterator(subIntervals); - } - private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator { private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int start, end, innerStart, innerEnd, queueEnd; + int start, end, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -258,21 +208,15 @@ public int end() { return end; } - @Override - public int innerWidth() { - return innerEnd - innerStart + 1; - } - @Override public void reset() throws IOException { this.queue.clear(); - this.queueEnd = start = end = innerEnd = innerStart = -1; + this.queueEnd = start = end = -1; for (IntervalIterator subIterator : subIterators) { subIterator.nextInterval(); queue.add(subIterator); if (subIterator.end() > queueEnd) { queueEnd = subIterator.end(); - innerEnd = subIterator.start(); } } } @@ -281,7 +225,6 @@ void updateRightExtreme(IntervalIterator it) { int itEnd = it.end(); if (itEnd > queueEnd) { queueEnd = itEnd; - innerEnd = it.start(); } } @@ -298,7 +241,6 @@ public int nextInterval() throws IOException { return NO_MORE_INTERVALS; do { start = queue.top().start(); - innerStart = queue.top().end(); end = queueEnd; if (queue.top().end() == end) return start; @@ -337,11 +279,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public void reset() { bpos = true; @@ -389,11 +326,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public void reset() throws IOException { bpos = true; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index d33a47530031..9dc8a0cd9ab4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -45,11 +45,6 @@ public interface IntervalIterator { */ int end(); - /** - * The width of the current interval - */ - int innerWidth(); - /** * Advance the iterator to the next interval * @@ -62,7 +57,7 @@ public interface IntervalIterator { * The score of the current interval */ default float score() { - return (float) (1.0 / (1 + innerWidth())); + return (float) (1.0 / (end() - start() + 1)); } float cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 1130d327613a..83a81b062a24 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -76,8 +76,11 @@ private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) termStats[termUpTo++] = termStatistics; } } + if (termUpTo == 0) { + return null; + } CollectionStatistics collectionStats = searcher.collectionStatistics(field); - return searcher.getSimilarity().scorer(boost, collectionStats, termStats); + return searcher.getSimilarity().scorer(boost, collectionStats, Arrays.copyOf(termStats, termUpTo)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 175b50762316..75989dc152c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -17,8 +17,12 @@ package org.apache.lucene.search; +import java.io.IOException; import java.util.Arrays; +import java.util.Set; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; /** @@ -48,7 +52,11 @@ public static IntervalsSource phrase(String... terms) { sources[i] = term(term); i++; } - return orderedNear(0, sources); + return phrase(sources); + } + + public static IntervalsSource phrase(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK); } public static IntervalsSource or(IntervalsSource... subSources) { @@ -57,33 +65,8 @@ public static IntervalsSource or(IntervalsSource... subSources) { return new DisjunctionIntervalsSource(Arrays.asList(subSources)); } - /** - * Create an ordered {@link IntervalsSource} with a maximum width - * - * Returns intervals in which the subsources all appear in the given order, and - * in which the width of the interval over which the subsources appear is less than - * the defined width - * - * @param width the maximum width of subquery-spanning intervals that will match - * @param subSources an ordered set of {@link IntervalsSource} objects - */ - public static IntervalsSource orderedNear(int width, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(0, width)); - } - - /** - * Create an ordered {@link IntervalsSource} with a defined width range - * - * Returns intervals in which the subsources all appear in the given order, and in - * which the width of the interval over which the subsources appear is between the - * minimum and maximum defined widths - * - * @param minWidth the minimum width of subquery-spanning intervals that will match - * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subSources an ordered set of {@link IntervalsSource} objects - */ - public static IntervalsSource orderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { + return new LowpassIntervalsSource(subSource, width); } /** @@ -97,35 +80,6 @@ public static IntervalsSource ordered(IntervalsSource... subSources) { return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED); } - /** - * Create an unordered {@link IntervalsSource} with a maximum width - * - * Returns intervals in which the subsources all appear in any order, and in which - * the width of the interval over which the subsources appear is less than the - * defined width - * - * @param width the maximum width of subquery-spanning intervals that will match - * @param subSources an unordered set of queries - */ - public static IntervalsSource unorderedNear(int width, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(0, width)); - } - - /** - * Create an unordered {@link IntervalsSource} with a defined width range - * - * Returns intervals in which the subsources all appear in any order, and in which - * the width of the interval over which the subsources appear is between the minimum - * and maximum defined widths - * - * @param minWidth the minimum width of subquery-spanning intervals that will match - * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subSources an unordered set of subsources - */ - public static IntervalsSource unorderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); - } - /** * Create an unordered {@link IntervalsSource} with an unbounded width range * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 3bdf1e50655d..bb0362171c5a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; @@ -37,4 +38,5 @@ public abstract class IntervalsSource { public abstract String toString(); public abstract void extractTerms(String field, Set terms); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java new file mode 100644 index 000000000000..39f24fbfb670 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class LowpassIntervalsSource extends IntervalsSource { + + final IntervalsSource in; + final int maxWidth; + + LowpassIntervalsSource(IntervalsSource in, int maxWidth) { + this.in = in; + this.maxWidth = maxWidth; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + LowpassIntervalsSource that = (LowpassIntervalsSource) o; + return maxWidth == that.maxWidth && + Objects.equals(in, that.in); + } + + @Override + public String toString() { + return "MAXWIDTH/" + maxWidth + "(" + in + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + in.extractTerms(field, terms); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + IntervalIterator i = in.intervals(field, ctx); + return new IntervalFilter(i) { + @Override + protected boolean accept() { + return (i.end() - i.start()) + 1 <= maxWidth; + } + }; + } + + @Override + public int hashCode() { + return Objects.hash(in, maxWidth); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index c60af1273b3f..4f7650e0df3d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -77,11 +77,6 @@ public int end() { return pos; } - @Override - public int innerWidth() { - return 1; - } - @Override public int nextInterval() throws IOException { if (upto <= 0) @@ -90,6 +85,11 @@ public int nextInterval() throws IOException { return pos = pe.nextPosition(); } + @Override + public float score() { + return 1; + } + @Override public float cost() { return cost; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index e2ab2af7c972..6aa4f833af05 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -86,29 +86,29 @@ public void testScoring() throws IOException { } } - public void testOrderedNearQueryWidth0() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(0, Intervals.term("w1"), Intervals.term("w2"))), + public void testPhraseQuery() throws IOException { + checkHits(new IntervalQuery(field, Intervals.phrase(Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); } - public void testOrderedNearQueryWidth1() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(1, Intervals.term("w1"), Intervals.term("w2"))), + public void testOrderedNearQueryWidth3() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), new int[]{0, 1, 2, 5}); } - public void testOrderedNearQueryWidth2() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(2, Intervals.term("w1"), Intervals.term("w2"))), + public void testOrderedNearQueryWidth4() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxwidth(4, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) Query q = new IntervalQuery(field, - Intervals.orderedNear(1, + Intervals.ordered( Intervals.term("w1"), - Intervals.orderedNear(2, Intervals.term("w2"), Intervals.term("w3")))); + Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w2"), Intervals.term("w3"))))); - checkHits(q, new int[]{0, 1, 2}); + checkHits(q, new int[]{0, 1, 3}); } public void testUnorderedQuery() throws IOException { @@ -160,7 +160,7 @@ public void testNotContainedByQuery() throws IOException { } public void testNestedOr() throws IOException { - Query q = new IntervalQuery(field, Intervals.orderedNear(0, + Query q = new IntervalQuery(field, Intervals.phrase( Intervals.term("coordinate"), Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")), Intervals.term("research"))); From 60601861fafcdbe5624148525e4a8b0e5eba0c99 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 16:04:53 +0000 Subject: [PATCH 25/83] javadocs --- .../org/apache/lucene/search/Intervals.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 75989dc152c8..84022db73cc5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef; /** - * Constructor functions for interval-based queries + * Constructor functions for {@link IntervalsSource} types * * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} * classes, implementing minimum-interval algorithms taken from the paper @@ -37,14 +37,23 @@ public final class Intervals { private Intervals() {} + /** + * Return an {@link IntervalsSource} exposing intervals for a term + */ public static IntervalsSource term(BytesRef term) { return new TermIntervalsSource(term); } + /** + * Return an {@link IntervalsSource} exposing intervals for a term + */ public static IntervalsSource term(String term) { return new TermIntervalsSource(new BytesRef(term)); } + /** + * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of terms + */ public static IntervalsSource phrase(String... terms) { IntervalsSource[] sources = new IntervalsSource[terms.length]; int i = 0; @@ -55,16 +64,27 @@ public static IntervalsSource phrase(String... terms) { return phrase(sources); } + /** + * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of IntervalsSources + */ public static IntervalsSource phrase(IntervalsSource... subSources) { return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK); } + /** + * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources + */ public static IntervalsSource or(IntervalsSource... subSources) { if (subSources.length == 1) return subSources[0]; return new DisjunctionIntervalsSource(Arrays.asList(subSources)); } + /** + * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals + * @param width the maximum width of intervals in the sub-source ot return + * @param subSource the sub-source to filter + */ public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { return new LowpassIntervalsSource(subSource, width); } From 4990685f4898752b885794c6b17abd4796a56201 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 16:12:24 +0000 Subject: [PATCH 26/83] Field masking IntervalsSource --- .../org/apache/lucene/search/Intervals.java | 45 +++++++++++++++++++ .../apache/lucene/search/TestIntervals.java | 15 ++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 84022db73cc5..5f15df2b94d1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; @@ -188,6 +189,50 @@ public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } + public static IntervalsSource mask(String field, IntervalsSource in) { + return new FieldMaskIntervalsSource(field, in); + } + + private static class FieldMaskIntervalsSource extends IntervalsSource { + + final String field; + final IntervalsSource in; + + private FieldMaskIntervalsSource(String field, IntervalsSource in) { + this.field = field; + this.in = in; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldMaskIntervalsSource that = (FieldMaskIntervalsSource) o; + return Objects.equals(field, that.field) && + Objects.equals(in, that.in); + } + + @Override + public String toString() { + return "XFIELD/" + field + "(" + in + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + in.extractTerms(field, terms); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return in.intervals(this.field, ctx); + } + + @Override + public int hashCode() { + return Objects.hash(field, in); + } + } + // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 5f5222ea41d6..0ee7defa3db7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -87,7 +87,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - assertNull(source.intervals(field + "1", ctx)); + // assertNull(source.intervals(field + "1", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); if (intervals == null) @@ -174,4 +174,17 @@ public void testNesting() throws IOException { }); } + public void testCrossFieldMasking() throws IOException { + checkIntervals(Intervals.ordered(Intervals.mask("field2", Intervals.term("xanadu")), Intervals.term("interest")), + "field1", 1, new int[][]{ + { 1, 2 }, + {}, + {}, + {}, + {}, + {}, + {} + }); + } + } From f5f60b45fee50add65e7260654ce3e2222df0170 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 8 Mar 2018 10:35:29 +0000 Subject: [PATCH 27/83] javadocs, fix ORDERED contract --- .../lucene/search/IntervalFunction.java | 2 +- .../lucene/search/IntervalIterator.java | 22 ++++++++++++++++--- .../apache/lucene/search/TestIntervals.java | 17 ++++++++++++-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 860815ab99c4..17cb5aa1d8ff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -136,7 +136,7 @@ public int end() { public void reset() throws IOException { subIterators.get(0).nextInterval(); i = 1; - start = end = Integer.MIN_VALUE; + start = end = -1; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 9dc8a0cd9ab4..1e5a1f80def7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,8 +20,8 @@ import java.io.IOException; /** - * Defines methods to iterate over the intervals that a {@link Scorer} matches - * on a document + * Defines methods to iterate over the intervals that a term, phrase or more + * complex positional query matches on a document */ public interface IntervalIterator { @@ -31,17 +31,28 @@ public interface IntervalIterator { */ int NO_MORE_INTERVALS = Integer.MAX_VALUE; + /** + * An iterator over documents that might have matching intervals + */ DocIdSetIterator approximation(); - boolean advanceTo(int doc) throws IOException; + /** + * Advances the iterator to {@code target}, returning {@code false} if there + * are definitely no matching intervals + */ + boolean advanceTo(int target) throws IOException; /** * The start of the current interval + * + * Returns -1 if {@link #nextInterval()} has not yet been called */ int start(); /** * The end of the current interval + * + * Returns -1 if {@link #nextInterval()} has not yet been called */ int end(); @@ -60,6 +71,11 @@ default float score() { return (float) (1.0 / (end() - start() + 1)); } + /** + * An indication of the cost of finding the next interval + * + * @see TwoPhaseIterator#matchCost() + */ float cost(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 0ee7defa3db7..d63438043883 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -87,7 +87,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - // assertNull(source.intervals(field + "1", ctx)); + assertNull(source.intervals(field + "fake", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); if (intervals == null) @@ -97,8 +97,10 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa int id = (int) ids.longValue(); if (intervals.advanceTo(doc)) { int i = 0, pos; + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -139,6 +141,17 @@ public void testOrderedNearIntervals() throws IOException { }); } + public void testPhraseIntervals() throws IOException { + checkIntervals(Intervals.phrase("pease", "porridge"), "field1", 3, new int[][]{ + {}, + { 0, 1, 3, 4, 6, 7 }, + { 0, 1, 3, 4, 6, 7 }, + {}, + { 0, 1, 3, 4, 6, 7 }, + {} + }); + } + public void testUnorderedNearIntervals() throws IOException { checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ From 457319a20b4a85c83c06dd9b1c954584b518311e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 8 Mar 2018 17:53:05 +0000 Subject: [PATCH 28/83] Simplify advanceTo(int) -> reset() --- .../search/ConjunctionIntervalIterator.java | 12 ---------- .../search/DifferenceIntervalFunction.java | 12 ++++++---- .../search/DisjunctionIntervalsSource.java | 23 +++++-------------- .../lucene/search/FilterIntervalIterator.java | 4 ++-- .../lucene/search/IntervalFunction.java | 15 ++++++++++-- .../lucene/search/IntervalIterator.java | 6 +---- .../apache/lucene/search/IntervalScorer.java | 3 ++- .../lucene/search/TermIntervalsSource.java | 11 ++------- .../apache/lucene/search/TestIntervals.java | 4 +++- 9 files changed, 37 insertions(+), 53 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index f6457d2ffd75..0290bcf982f1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -46,18 +46,6 @@ public final DocIdSetIterator approximation() { return approximation; } - @Override - public final boolean advanceTo(int doc) throws IOException { - for (IntervalIterator it : subIterators) { - if (it.advanceTo(doc) == false) - return false; - } - reset(); - return true; - } - - protected abstract void reset() throws IOException; - @Override public final float cost() { return cost; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index e5310f9482fd..fca1562beb49 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -96,11 +96,15 @@ public int end() { } @Override - public boolean advanceTo(int doc) throws IOException { - bpos = b.advanceTo(doc); - if (bpos) + public void reset() throws IOException { + int doc = a.approximation().docID(); + bpos = b.approximation().docID() == doc || + (b.approximation().docID() < doc && b.approximation().advance(doc) == doc); + if (bpos) { + b.reset(); bpos = b.nextInterval() != NO_MORE_INTERVALS; - return a.advanceTo(doc); + } + a.reset(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 1327c131d894..4308e5065c5b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -127,21 +127,14 @@ public int end() { } @Override - public boolean advanceTo(int doc) throws IOException { + public void reset() throws IOException { intervalQueue.clear(); - int approxDoc = this.approximation.docID(); - if (approxDoc > doc || (approxDoc != doc && this.approximation.advance(doc) != doc)) { - return false; - } for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - IntervalIterator it = dw.intervals; - if (it.advanceTo(doc)) { - it.nextInterval(); - intervalQueue.add(it); - } + dw.intervals.reset(); + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); } current = UNPOSITIONED; - return intervalQueue.size() > 0; } @Override @@ -183,9 +176,7 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return false; - } + public void reset() throws IOException { } @Override public int start() { @@ -215,9 +206,7 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return false; - } + public void reset() throws IOException { } @Override public int start() { diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index bb4d736fe3ec..ddb2aa358e09 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -43,8 +43,8 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return in.advanceTo(doc); + public void reset() throws IOException { + in.reset(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 17cb5aa1d8ff..c3e985a6332a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -58,7 +58,10 @@ private static class BlockIntervalIterator extends ConjunctionIntervalIterator { } @Override - protected void reset() throws IOException { + public void reset() throws IOException { + for (IntervalIterator it : subIterators) { + it.reset(); + } start = end = -1; } @@ -134,6 +137,9 @@ public int end() { @Override public void reset() throws IOException { + for (IntervalIterator it : subIterators) { + it.reset(); + } subIterators.get(0).nextInterval(); i = 1; start = end = -1; @@ -213,6 +219,7 @@ public void reset() throws IOException { this.queue.clear(); this.queueEnd = start = end = -1; for (IntervalIterator subIterator : subIterators) { + subIterator.reset(); subIterator.nextInterval(); queue.add(subIterator); if (subIterator.end() > queueEnd) { @@ -280,7 +287,9 @@ public int end() { } @Override - public void reset() { + public void reset() throws IOException { + a.reset(); + b.reset(); bpos = true; } @@ -328,6 +337,8 @@ public int end() { @Override public void reset() throws IOException { + a.reset(); + b.reset(); bpos = true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 1e5a1f80def7..a14609474615 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -36,11 +36,7 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); - /** - * Advances the iterator to {@code target}, returning {@code false} if there - * are definitely no matching intervals - */ - boolean advanceTo(int target) throws IOException; + void reset() throws IOException; /** * The start of the current interval diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index d70e93d7705c..b0b8fc24a496 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -81,7 +81,8 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.advanceTo(docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + intervals.reset(); + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 4f7650e0df3d..95e772ca142b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -55,16 +55,9 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { + public void reset() throws IOException { pos = -1; - if (pe.docID() > doc || (pe.docID() != doc && pe.advance(doc) != doc)) { - upto = -1; - return false; - } - else { - upto = pe.freq(); - return true; - } + upto = pe.freq(); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index d63438043883..bc89efc70f63 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -95,7 +95,9 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.advanceTo(doc)) { + if (intervals.approximation().docID() == doc || + (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { + intervals.reset(); int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); From 73fda3f54ccf717746012f60b0b7634bfc55d900 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 10:05:23 +0000 Subject: [PATCH 29/83] IntervalFunction and DifferenceIntervalFunction are package-private --- .../lucene/search/DifferenceIntervalFunction.java | 10 +++++----- .../org/apache/lucene/search/IntervalFunction.java | 12 ++++++------ .../src/java/org/apache/lucene/search/Intervals.java | 3 +-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index fca1562beb49..0b93ffb573f7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -24,7 +24,7 @@ * A function that takes two interval iterators and combines them to produce a third, * generally by computing a difference interval between them */ -public abstract class DifferenceIntervalFunction { +abstract class DifferenceIntervalFunction { @Override public abstract int hashCode(); @@ -44,7 +44,7 @@ public abstract class DifferenceIntervalFunction { * Filters the minuend iterator so that only intervals that do not overlap intervals from the * subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { + static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NonOverlappingIterator(minuend, subtrahend); @@ -55,7 +55,7 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah * Filters the minuend iterator so that only intervals that do not contain intervals from the * subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NotContainingIterator(minuend, subtrahend); @@ -66,7 +66,7 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah * Filters the minuend iterator so that only intervals that are not contained by intervals from * the subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { + static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NotContainedByIterator(minuend, subtrahend); @@ -146,7 +146,7 @@ public int nextInterval() throws IOException { * Filters the minuend iterator so that only intervals that do not occur within a set number * of positions of intervals from the subtrahend iterator are returned */ - public static class NotWithinFunction extends DifferenceIntervalFunction { + static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index c3e985a6332a..d715d635c203 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -26,7 +26,7 @@ /** * Combine a list of {@link IntervalIterator}s into another */ -public abstract class IntervalFunction { +abstract class IntervalFunction { @Override public abstract int hashCode(); @@ -42,7 +42,7 @@ public abstract class IntervalFunction { */ public abstract IntervalIterator apply(List iterators); - public static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { + static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { @Override public IntervalIterator apply(List iterators) { return new BlockIntervalIterator(iterators); @@ -108,7 +108,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the subiterators appear in a given order */ - public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { return new OrderedIntervalIterator(intervalIterators); @@ -175,7 +175,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the subiterators appear in any order */ - public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { + static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { return new UnorderedIntervalIterator(intervalIterators); @@ -265,7 +265,7 @@ public int nextInterval() throws IOException { /** * Returns an interval over iterators where the first iterator contains intervals from the second */ - public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { + static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { @Override public IntervalIterator apply(List iterators) { if (iterators.size() != 2) @@ -314,7 +314,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the first iterator is contained by intervals from the second */ - public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { + static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { @Override public IntervalIterator apply(List iterators) { if (iterators.size() != 2) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 5f15df2b94d1..f7531fc75def 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -29,8 +29,7 @@ /** * Constructor functions for {@link IntervalsSource} types * - * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} - * classes, implementing minimum-interval algorithms taken from the paper + * These sources implement minimum-interval algorithms taken from the paper * * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics */ From 5311c75b1aac6a03afe60d0d5a6fbbdd06c651c7 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 11:51:48 +0000 Subject: [PATCH 30/83] feedback --- .../lucene/search/IntervalIterator.java | 11 ++++- .../org/apache/lucene/search/Intervals.java | 44 ------------------- .../apache/lucene/search/IntervalsSource.java | 19 +++++++- .../lucene/search/TermIntervalsSource.java | 3 ++ .../apache/lucene/search/TestIntervals.java | 22 ++++------ 5 files changed, 39 insertions(+), 60 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a14609474615..740ba97e7966 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -22,6 +22,11 @@ /** * Defines methods to iterate over the intervals that a term, phrase or more * complex positional query matches on a document + * + * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the + * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call + * {@link #reset()}, and then {@link #nextInterval()} to retrieve intervals until + * {@link #NO_MORE_INTERVALS} is returned. */ public interface IntervalIterator { @@ -36,6 +41,10 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); + /** + * Prepare to iterate over the intervals in a document after the approximation + * {@link DocIdSetIterator} has been advanced. + */ void reset() throws IOException; /** @@ -68,7 +77,7 @@ default float score() { } /** - * An indication of the cost of finding the next interval + * An indication of the average cost of iterating over all intervals in a document * * @see TwoPhaseIterator#matchCost() */ diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index f7531fc75def..aa29fa2f4b99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -188,50 +188,6 @@ public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } - public static IntervalsSource mask(String field, IntervalsSource in) { - return new FieldMaskIntervalsSource(field, in); - } - - private static class FieldMaskIntervalsSource extends IntervalsSource { - - final String field; - final IntervalsSource in; - - private FieldMaskIntervalsSource(String field, IntervalsSource in) { - this.field = field; - this.in = in; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FieldMaskIntervalsSource that = (FieldMaskIntervalsSource) o; - return Objects.equals(field, that.field) && - Objects.equals(in, that.in); - } - - @Override - public String toString() { - return "XFIELD/" + field + "(" + in + ")"; - } - - @Override - public void extractTerms(String field, Set terms) { - in.extractTerms(field, terms); - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - return in.intervals(this.field, ctx); - } - - @Override - public int hashCode() { - return Objects.hash(field, in); - } - } - // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index bb0362171c5a..86905020f76c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -24,10 +24,27 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +/** + * A helper class for {@link IntervalQuery} that provides an {@link IntervalIterator} + * for a given field and segment + */ public abstract class IntervalsSource { + /** + * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} + * + * @param field the field to read positions from + * @param ctx the context for which to return the iterator + */ public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; + /** + * Expert: collect {@link Term} objects from this source, to be used for top-level term scoring + * @param field the field to be scored + * @param terms a {@link Set} which terms should be added to + */ + public abstract void extractTerms(String field, Set terms); + @Override public abstract int hashCode(); @@ -37,6 +54,4 @@ public abstract class IntervalsSource { @Override public abstract String toString(); - public abstract void extractTerms(String field, Set terms); - } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 95e772ca142b..25139bb86ca3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -41,6 +41,9 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO Terms terms = ctx.reader().terms(field); if (terms == null) return null; + if (terms.hasPositions() == false) { + throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field + " because it has no indexed positions"); + } TermsEnum te = terms.iterator(); te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index bc89efc70f63..92cd8483b970 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; @@ -72,6 +73,7 @@ public static void setupIndex() throws IOException { Document doc = new Document(); doc.add(new TextField("field1", field1_docs[i], Field.Store.NO)); doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); + doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); doc.add(new NumericDocValuesField("id", i)); writer.addDocument(doc); } @@ -120,6 +122,13 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa assertEquals(expectedMatchCount, matchedDocs); } + public void testIntervalsOnFieldWithNoPositions() throws IOException { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { + Intervals.term("wibble").intervals("id", searcher.leafContexts.get(0)); + }); + assertEquals("Cannot create an IntervalIterator over field id because it has no indexed positions", e.getMessage()); + } + public void testTermQueryIntervals() throws IOException { checkIntervals(Intervals.term("porridge"), "field1", 4, new int[][]{ {}, @@ -189,17 +198,4 @@ public void testNesting() throws IOException { }); } - public void testCrossFieldMasking() throws IOException { - checkIntervals(Intervals.ordered(Intervals.mask("field2", Intervals.term("xanadu")), Intervals.term("interest")), - "field1", 1, new int[][]{ - { 1, 2 }, - {}, - {}, - {}, - {}, - {}, - {} - }); - } - } From 1e7ee235d98aaa4a2d83be2ec45eeab031b78ad1 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 17:05:46 +0000 Subject: [PATCH 31/83] Remove specialised score() method on IntervalIterator --- .../lucene/search/FilterIntervalIterator.java | 5 ----- .../org/apache/lucene/search/IntervalFunction.java | 5 ----- .../org/apache/lucene/search/IntervalIterator.java | 7 ------- .../org/apache/lucene/search/IntervalScorer.java | 2 +- .../apache/lucene/search/TermIntervalsSource.java | 5 ----- .../org/apache/lucene/search/TestIntervalQuery.java | 13 ------------- 6 files changed, 1 insertion(+), 36 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index ddb2aa358e09..45b9870782d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -52,11 +52,6 @@ public int nextInterval() throws IOException { return in.nextInterval(); } - @Override - public float score() { - return in.score(); - } - @Override public float cost() { return in.cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index d715d635c203..d2eee57a6f26 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -75,11 +75,6 @@ public int end() { return end; } - @Override - public float score() { - return 1; - } - @Override public int nextInterval() throws IOException { if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 740ba97e7966..d5911accbe4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -69,13 +69,6 @@ public interface IntervalIterator { */ int nextInterval() throws IOException; - /** - * The score of the current interval - */ - default float score() { - return (float) (1.0 / (end() - start() + 1)); - } - /** * An indication of the average cost of iterating over all intervals in a document * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index b0b8fc24a496..c1b73b684bea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -65,7 +65,7 @@ private void ensureFreq() throws IOException { lastScoredDoc = docID(); freq = 0; do { - freq += intervals.score(); + freq += (1.0 / (intervals.end() - intervals.start() + 1)); } while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 25139bb86ca3..224563943088 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -81,11 +81,6 @@ public int nextInterval() throws IOException { return pos = pe.nextPosition(); } - @Override - public float score() { - return 1; - } - @Override public float cost() { return cost; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 6aa4f833af05..6c1ba8ff6c1b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,19 +73,6 @@ private void checkHits(Query query, int[] results) throws IOException { CheckHits.checkHits(random(), query, field, searcher, results); } - public void testScoring() throws IOException { - PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = new IntervalQuery(field, Intervals.phrase("w2", "w3")); - - TopDocs td1 = searcher.search(pq, 10); - TopDocs td2 = searcher.search(equiv, 10); - assertEquals(td1.totalHits, td2.totalHits); - for (int i = 0; i < td1.scoreDocs.length; i++) { - assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc); - assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 0f); - } - } - public void testPhraseQuery() throws IOException { checkHits(new IntervalQuery(field, Intervals.phrase(Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); From 86a98e16f75e1a3503afe8dd00ca59d275bb80fd Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 17:58:29 +0000 Subject: [PATCH 32/83] Remove reset() and track via the approximation docid --- .../search/DifferenceIntervalFunction.java | 19 ++-- .../search/DisjunctionIntervalsSource.java | 58 ++-------- .../lucene/search/FilterIntervalIterator.java | 5 - .../lucene/search/IntervalFunction.java | 102 +++++++++--------- .../lucene/search/IntervalIterator.java | 6 -- .../apache/lucene/search/IntervalScorer.java | 1 - .../lucene/search/TermIntervalsSource.java | 18 ++-- .../apache/lucene/search/TestIntervals.java | 1 - 8 files changed, 83 insertions(+), 127 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 0b93ffb573f7..c6e7ff64688f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -79,6 +79,7 @@ private static abstract class RelativeIterator implements IntervalIterator { final IntervalIterator b; boolean bpos; + int doc = -1; RelativeIterator(IntervalIterator a, IntervalIterator b) { this.a = a; @@ -95,16 +96,13 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - int doc = a.approximation().docID(); - bpos = b.approximation().docID() == doc || - (b.approximation().docID() < doc && b.approximation().advance(doc) == doc); - if (bpos) { - b.reset(); - bpos = b.nextInterval() != NO_MORE_INTERVALS; + protected void checkDoc() throws IOException { + if (doc != a.approximation().docID()) { + doc = a.approximation().docID(); + bpos = (b.approximation().docID() == doc || + (b.approximation().docID() < doc && b.approximation().advance(doc) == doc)) && + b.nextInterval() != NO_MORE_INTERVALS; } - a.reset(); } @Override @@ -126,6 +124,7 @@ private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtra @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -203,6 +202,7 @@ private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrah @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -228,6 +228,7 @@ private static class NotContainedByIterator extends RelativeIterator { @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 4308e5065c5b..3ba5538eff82 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -83,7 +83,8 @@ private static class DisjunctionIntervalIterator implements IntervalIterator { final List iterators; final float matchCost; - IntervalIterator current; + IntervalIterator current = EMPTY; + int doc = -1; DisjunctionIntervalIterator(List iterators) { this.iterators = iterators; @@ -126,20 +127,15 @@ public int end() { return current.end(); } - @Override - public void reset() throws IOException { - intervalQueue.clear(); - for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - dw.intervals.reset(); - dw.intervals.nextInterval(); - intervalQueue.add(dw.intervals); - } - current = UNPOSITIONED; - } - @Override public int nextInterval() throws IOException { - if (current == UNPOSITIONED) { + if (doc != approximation.docID()) { + doc = approximation.docID(); + intervalQueue.clear(); + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); + } current = intervalQueue.top(); return current.start(); } @@ -175,39 +171,6 @@ public DocIdSetIterator approximation() { throw new UnsupportedOperationException(); } - @Override - public void reset() throws IOException { } - - @Override - public int start() { - return NO_MORE_INTERVALS; - } - - @Override - public int end() { - return NO_MORE_INTERVALS; - } - - @Override - public int nextInterval() throws IOException { - return NO_MORE_INTERVALS; - } - - @Override - public float cost() { - return 0; - } - }; - - private static final IntervalIterator UNPOSITIONED = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - throw new UnsupportedOperationException(); - } - - @Override - public void reset() throws IOException { } - @Override public int start() { return -1; @@ -219,7 +182,7 @@ public int end() { } @Override - public int nextInterval() throws IOException { + public int nextInterval() { return NO_MORE_INTERVALS; } @@ -228,4 +191,5 @@ public float cost() { return 0; } }; + } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 45b9870782d2..cf1843a1e6d0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -42,11 +42,6 @@ public DocIdSetIterator approximation() { return in.approximation(); } - @Override - public void reset() throws IOException { - in.reset(); - } - @Override public int nextInterval() throws IOException { return in.nextInterval(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index d2eee57a6f26..5df8c85031cd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -51,32 +51,34 @@ public IntervalIterator apply(List iterators) { private static class BlockIntervalIterator extends ConjunctionIntervalIterator { - int start, end; + int doc = -1, start = -1, end = -1; BlockIntervalIterator(List subIterators) { super(subIterators); } - @Override - public void reset() throws IOException { - for (IntervalIterator it : subIterators) { - it.reset(); - } - start = end = -1; - } - @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { + if (doc != approximation.docID()) { + return -1; + } return end; } @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + start = end = -1; + } if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return NO_MORE_INTERVALS; int i = 1; @@ -112,9 +114,7 @@ public IntervalIterator apply(List intervalIterators) { private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { - int start; - int end; - int i; + int doc = -1, start = -1, end = -1, i; private OrderedIntervalIterator(List subIntervals) { super(subIntervals); @@ -122,26 +122,28 @@ private OrderedIntervalIterator(List subIntervals) { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { - return end; - } - - @Override - public void reset() throws IOException { - for (IntervalIterator it : subIterators) { - it.reset(); + if (doc != approximation.docID()) { + return -1; } - subIterators.get(0).nextInterval(); - i = 1; - start = end = -1; + return end; } @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + subIterators.get(0).nextInterval(); + i = 1; + start = end = -1; + } start = end = NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; while (true) { @@ -182,7 +184,7 @@ private static class UnorderedIntervalIterator extends ConjunctionIntervalIterat private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int start, end, queueEnd; + int doc = -1, start = -1, end = -1, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -201,26 +203,18 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { - return end; - } - - @Override - public void reset() throws IOException { - this.queue.clear(); - this.queueEnd = start = end = -1; - for (IntervalIterator subIterator : subIterators) { - subIterator.reset(); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - } + if (doc != approximation.docID()) { + return -1; } + return end; } void updateRightExtreme(IntervalIterator it) { @@ -232,6 +226,16 @@ void updateRightExtreme(IntervalIterator it) { @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + this.queue.clear(); + this.queueEnd = start = end = -1; + for (IntervalIterator it : subIterators) { + it.nextInterval(); + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { @@ -270,6 +274,7 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; + int doc = -1; @Override public int start() { @@ -281,15 +286,12 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - a.reset(); - b.reset(); - bpos = true; - } - @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + bpos = true; + } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -319,6 +321,7 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; + int doc = -1; @Override public int start() { @@ -330,15 +333,12 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - a.reset(); - b.reset(); - bpos = true; - } - @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + bpos = true; + } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index d5911accbe4d..c41888b91e87 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -41,12 +41,6 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); - /** - * Prepare to iterate over the intervals in a document after the approximation - * {@link DocIdSetIterator} has been advanced. - */ - void reset() throws IOException; - /** * The start of the current interval * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index c1b73b684bea..279265f3dbff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -81,7 +81,6 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.reset(); return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 224563943088..a9eb01e3937c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -50,31 +50,35 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO float cost = PhraseQuery.termPositionsCost(te); return new IntervalIterator() { - int pos, upto; + int doc = -1, pos = -1, upto; @Override public DocIdSetIterator approximation() { return pe; } - @Override - public void reset() throws IOException { - pos = -1; - upto = pe.freq(); - } - @Override public int start() { + if (doc != pe.docID()) { + return -1; + } return pos; } @Override public int end() { + if (doc != pe.docID()) { + return -1; + } return pos; } @Override public int nextInterval() throws IOException { + if (doc != pe.docID()) { + doc = pe.docID(); + upto = pe.freq(); + } if (upto <= 0) return pos = NO_MORE_INTERVALS; upto--; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 92cd8483b970..ec28cef73b2d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,7 +99,6 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa int id = (int) ids.longValue(); if (intervals.approximation().docID() == doc || (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { - intervals.reset(); int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); From 580c155e0a262fa58194f654e4eb0e065546e3e3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 20:53:19 +0000 Subject: [PATCH 33/83] javadocs --- .../apache/lucene/search/DisjunctionIntervalsSource.java | 6 ++++++ .../src/java/org/apache/lucene/search/IntervalIterator.java | 3 +-- .../src/java/org/apache/lucene/search/IntervalsSource.java | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 3ba5538eff82..b2b5840bb335 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -119,11 +119,17 @@ public float cost() { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return current.start(); } @Override public int end() { + if (doc != approximation.docID()) { + return -1; + } return current.end(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index c41888b91e87..7dea05990ae3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -25,8 +25,7 @@ * * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call - * {@link #reset()}, and then {@link #nextInterval()} to retrieve intervals until - * {@link #NO_MORE_INTERVALS} is returned. + * {@link #nextInterval()} to retrieve intervals until {@link #NO_MORE_INTERVALS} is returned. */ public interface IntervalIterator { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 86905020f76c..e1d2fe1598cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -33,6 +33,8 @@ public abstract class IntervalsSource { /** * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} * + * Returns {@code null} if no intervals for this field exist in this segment + * * @param field the field to read positions from * @param ctx the context for which to return the iterator */ From 139b32759f198621831eb04202a2a8b8fc9e2543 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 11 Mar 2018 20:17:58 +0000 Subject: [PATCH 34/83] Make IntervalIterator a DISI --- .../apache/lucene/search/ConjunctionDISI.java | 2 +- .../search/ConjunctionIntervalIterator.java | 23 ++--- .../search/DifferenceIntervalFunction.java | 60 ++++++++----- .../org/apache/lucene/search/DisiWrapper.java | 8 +- .../search/DisjunctionIntervalsSource.java | 59 +++++++------ .../lucene/search/FilterIntervalIterator.java | 54 ------------ .../apache/lucene/search/IntervalFilter.java | 27 +++++- .../lucene/search/IntervalFunction.java | 88 ++++++++----------- .../lucene/search/IntervalIterator.java | 61 ++++++++++--- .../apache/lucene/search/IntervalScorer.java | 6 +- .../lucene/search/TermIntervalsSource.java | 33 ++++--- .../apache/lucene/search/TestIntervals.java | 6 +- 12 files changed, 208 insertions(+), 219 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 780e854033a8..8ed42316a09f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -58,7 +58,7 @@ public static DocIdSetIterator intersectScorers(Collection scorers) { * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using * {@link TwoPhaseIterator#unwrap}. */ - public static DocIdSetIterator intersectIterators(List iterators) { + public static DocIdSetIterator intersectIterators(List iterators) { if (iterators.size() < 2) { throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index 0290bcf982f1..cfda0de03f50 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -21,38 +21,25 @@ import java.util.ArrayList; import java.util.List; -abstract class ConjunctionIntervalIterator implements IntervalIterator { +abstract class ConjunctionIntervalIterator extends IntervalIterator { - protected final List subIterators; + final List subIterators; - final DocIdSetIterator approximation; final float cost; ConjunctionIntervalIterator(List subIterators) { + super(ConjunctionDISI.intersectIterators(subIterators)); this.subIterators = subIterators; float costsum = 0; - List approximations = new ArrayList<>(); for (IntervalIterator it : subIterators) { - costsum += it.cost(); - approximations.add(it.approximation()); + costsum += it.matchCost(); } this.cost = costsum; - this.approximation = ConjunctionDISI.intersectIterators(approximations); - - } - - @Override - public final DocIdSetIterator approximation() { - return approximation; } @Override - public final float cost() { + public final float matchCost() { return cost; } - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index c6e7ff64688f..bc89bfed70b3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -73,19 +73,26 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; - private static abstract class RelativeIterator implements IntervalIterator { + private static abstract class RelativeIterator extends IntervalIterator { final IntervalIterator a; final IntervalIterator b; boolean bpos; - int doc = -1; RelativeIterator(IntervalIterator a, IntervalIterator b) { + super(a); this.a = a; this.b = b; } + @Override + protected void reset() throws IOException { + int doc = a.docID(); + bpos = b.docID() == doc || + (b.docID() < doc && b.advance(doc) == doc); + } + @Override public int start() { return a.start(); @@ -96,23 +103,9 @@ public int end() { return a.end(); } - protected void checkDoc() throws IOException { - if (doc != a.approximation().docID()) { - doc = a.approximation().docID(); - bpos = (b.approximation().docID() == doc || - (b.approximation().docID() < doc && b.approximation().advance(doc) == doc)) && - b.nextInterval() != NO_MORE_INTERVALS; - } - } - @Override - public DocIdSetIterator approximation() { - return a.approximation(); - } - - @Override - public float cost() { - return a.cost() + b.cost(); + public float matchCost() { + return a.matchCost() + b.matchCost(); } } @@ -124,7 +117,6 @@ private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtra @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -173,15 +165,22 @@ public int hashCode() { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new FilterIntervalIterator(subtrahend) { + IntervalIterator notWithin = new IntervalIterator(subtrahend) { + + boolean positioned = false; + @Override public int start() { + if (positioned == false) + return -1; int start = subtrahend.start(); return Math.max(0, start - positions); } @Override public int end() { + if (positioned == false) + return -1; int end = subtrahend.end(); int newEnd = end + positions; if (newEnd < 0) // check for overflow @@ -189,6 +188,25 @@ public int end() { return newEnd; } + @Override + public int nextInterval() throws IOException { + if (positioned == false) { + positioned = true; + } + return subtrahend.nextInterval(); + } + + @Override + public float matchCost() { + return subtrahend.matchCost(); + } + + @Override + protected void reset() throws IOException { + // already called when the subtrahend approximation is advanced + positioned = false; + } + }; return NON_OVERLAPPING.apply(minuend, notWithin); } @@ -202,7 +220,6 @@ private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrah @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -228,7 +245,6 @@ private static class NotContainedByIterator extends RelativeIterator { @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index aba5dff908a6..edca0e3a26a6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -92,12 +92,12 @@ public DisiWrapper(IntervalIterator iterator) { this.scorer = null; this.spans = null; this.intervals = iterator; - this.iterator = iterator.approximation(); - this.cost = iterator.approximation().cost(); + this.iterator = iterator; + this.cost = iterator.cost(); this.doc = -1; this.twoPhaseView = null; - this.approximation = iterator.approximation(); - this.matchCost = iterator.cost(); + this.approximation = iterator; + this.matchCost = iterator.matchCost(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index b2b5840bb335..2f7b01e88a8c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -75,18 +75,18 @@ public void extractTerms(String field, Set terms) { } } - private static class DisjunctionIntervalIterator implements IntervalIterator { + private static class DisjunctionIntervalIterator extends IntervalIterator { final PriorityQueue intervalQueue; final DisiPriorityQueue disiQueue; - final DisjunctionDISIApproximation approximation; final List iterators; final float matchCost; IntervalIterator current = EMPTY; - int doc = -1; DisjunctionIntervalIterator(List iterators) { + super(buildApproximation(iterators)); + this.disiQueue = ((DisjunctionDISIApproximation)approximation).subIterators; this.iterators = iterators; this.intervalQueue = new PriorityQueue(iterators.size()) { @Override @@ -97,52 +97,52 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; - this.disiQueue = new DisiPriorityQueue(iterators.size()); float costsum = 0; for (IntervalIterator it : iterators) { - this.disiQueue.add(new DisiWrapper(it)); costsum += it.cost(); } this.matchCost = costsum; - this.approximation = new DisjunctionDISIApproximation(this.disiQueue); } - @Override - public DocIdSetIterator approximation() { - return approximation; + private static DocIdSetIterator buildApproximation(List iterators) { + DisiPriorityQueue disiQueue = new DisiPriorityQueue(iterators.size()); + for (IntervalIterator it : iterators) { + disiQueue.add(new DisiWrapper(it)); + } + return new DisjunctionDISIApproximation(disiQueue); } @Override - public float cost() { + public float matchCost() { return matchCost; } @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return current.start(); } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return current.end(); } + @Override + protected void reset() throws IOException { + intervalQueue.clear(); + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); + } + current = EMPTY; + } + @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - intervalQueue.clear(); - for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - dw.intervals.nextInterval(); - intervalQueue.add(dw.intervals); + if (current == EMPTY) { + if (intervalQueue.size() > 0) { + current = intervalQueue.top(); } - current = intervalQueue.top(); return current.start(); } int start = current.start(), end = current.end(); @@ -171,11 +171,7 @@ private boolean contains(IntervalIterator it, int start, int end) { } - private static final IntervalIterator EMPTY = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - throw new UnsupportedOperationException(); - } + private static final IntervalIterator EMPTY = new IntervalIterator(DocIdSetIterator.empty()) { @Override public int start() { @@ -193,9 +189,14 @@ public int nextInterval() { } @Override - public float cost() { + public float matchCost() { return 0; } + + @Override + protected void reset() throws IOException { + + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java deleted file mode 100644 index cf1843a1e6d0..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; - -public abstract class FilterIntervalIterator implements IntervalIterator { - - protected final IntervalIterator in; - - protected FilterIntervalIterator(IntervalIterator in) { - this.in = in; - } - - @Override - public int start() { - return in.start(); - } - - @Override - public int end() { - return in.end(); - } - - @Override - public DocIdSetIterator approximation() { - return in.approximation(); - } - - @Override - public int nextInterval() throws IOException { - return in.nextInterval(); - } - - @Override - public float cost() { - return in.cost(); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index aa074039bdd5..3add87d78243 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -22,13 +22,36 @@ /** * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function */ -public abstract class IntervalFilter extends FilterIntervalIterator { +public abstract class IntervalFilter extends IntervalIterator { + + private final IntervalIterator in; /** * Create a new filter */ public IntervalFilter(IntervalIterator in) { - super(in); + super(in.approximation); + this.in = in; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public float matchCost() { + return in.matchCost(); + } + + @Override + protected void reset() throws IOException { + in.reset(); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 5df8c85031cd..628c54cef4c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -51,7 +51,7 @@ public IntervalIterator apply(List iterators) { private static class BlockIntervalIterator extends ConjunctionIntervalIterator { - int doc = -1, start = -1, end = -1; + int start = -1, end = -1; BlockIntervalIterator(List subIterators) { super(subIterators); @@ -59,26 +59,16 @@ private static class BlockIntervalIterator extends ConjunctionIntervalIterator { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - start = end = -1; - } if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return NO_MORE_INTERVALS; int i = 1; @@ -100,6 +90,11 @@ public int nextInterval() throws IOException { end = subIterators.get(subIterators.size() - 1).end(); return start; } + + @Override + protected void reset() { + start = end = -1; + } } /** @@ -114,7 +109,7 @@ public IntervalIterator apply(List intervalIterators) { private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { - int doc = -1, start = -1, end = -1, i; + int start = -1, end = -1, i; private OrderedIntervalIterator(List subIntervals) { super(subIntervals); @@ -122,30 +117,19 @@ private OrderedIntervalIterator(List subIntervals) { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - subIterators.get(0).nextInterval(); - i = 1; - start = end = -1; - } start = end = NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; + i = 1; while (true) { while (true) { if (subIterators.get(i - 1).end() >= b) @@ -167,6 +151,13 @@ public int nextInterval() throws IOException { return start; } } + + @Override + protected void reset() throws IOException { + subIterators.get(0).nextInterval(); + i = 1; + start = end = -1; + } } /** @@ -184,7 +175,7 @@ private static class UnorderedIntervalIterator extends ConjunctionIntervalIterat private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int doc = -1, start = -1, end = -1, queueEnd; + int start = -1, end = -1, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -203,17 +194,11 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @@ -226,16 +211,6 @@ void updateRightExtreme(IntervalIterator it) { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - this.queue.clear(); - this.queueEnd = start = end = -1; - for (IntervalIterator it : subIterators) { - it.nextInterval(); - queue.add(it); - updateRightExtreme(it); - } - } while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { @@ -259,6 +234,17 @@ public int nextInterval() throws IOException { return start; } + @Override + protected void reset() throws IOException { + queueEnd = start = end = -1; + this.queue.clear(); + for (IntervalIterator it : subIterators) { + it.nextInterval(); + queue.add(it); + updateRightExtreme(it); + } + } + } /** @@ -274,7 +260,6 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; - int doc = -1; @Override public int start() { @@ -288,10 +273,6 @@ public int end() { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - bpos = true; - } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -304,6 +285,11 @@ public int nextInterval() throws IOException { } return NO_MORE_INTERVALS; } + + @Override + protected void reset() throws IOException { + bpos = true; + } }; } }; @@ -321,7 +307,6 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; - int doc = -1; @Override public int start() { @@ -335,10 +320,6 @@ public int end() { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - bpos = true; - } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -351,6 +332,11 @@ public int nextInterval() throws IOException { } return NO_MORE_INTERVALS; } + + @Override + protected void reset() throws IOException { + bpos = true; + } }; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 7dea05990ae3..80451403237c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -23,36 +23,61 @@ * Defines methods to iterate over the intervals that a term, phrase or more * complex positional query matches on a document * - * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the - * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call - * {@link #nextInterval()} to retrieve intervals until {@link #NO_MORE_INTERVALS} is returned. + * The iterator is advanced by calling {@link #advance(int)} or {@link #nextDoc()}. + * Consumers should then call {@link #nextInterval()} to retrieve intervals until + * {@link #NO_MORE_INTERVALS} is returned. */ -public interface IntervalIterator { +public abstract class IntervalIterator extends DocIdSetIterator { + + protected final DocIdSetIterator approximation; + + protected IntervalIterator(DocIdSetIterator approximation) { + this.approximation = approximation; + } /** * When returned from {@link #nextInterval()}, indicates that there are no more * matching intervals on the current document */ - int NO_MORE_INTERVALS = Integer.MAX_VALUE; + public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - /** - * An iterator over documents that might have matching intervals - */ - DocIdSetIterator approximation(); + @Override + public final int docID() { + return approximation.docID(); + } + + @Override + public final int nextDoc() throws IOException { + int doc = approximation.nextDoc(); + reset(); + return doc; + } + + @Override + public final int advance(int target) throws IOException { + int doc = approximation.advance(target); + reset(); + return doc; + } + + @Override + public final long cost() { + return approximation.cost(); + } /** * The start of the current interval * * Returns -1 if {@link #nextInterval()} has not yet been called */ - int start(); + public abstract int start(); /** * The end of the current interval * * Returns -1 if {@link #nextInterval()} has not yet been called */ - int end(); + public abstract int end(); /** * Advance the iterator to the next interval @@ -60,13 +85,23 @@ public interface IntervalIterator { * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if * there are no more intervals on the current document */ - int nextInterval() throws IOException; + public abstract int nextInterval() throws IOException; /** * An indication of the average cost of iterating over all intervals in a document * * @see TwoPhaseIterator#matchCost() */ - float cost(); + public abstract float matchCost(); + + /** + * Called when the underlying iterator has been advanced. + */ + protected abstract void reset() throws IOException; + + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 279265f3dbff..279d3e00885c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -22,7 +22,6 @@ class IntervalScorer extends Scorer { private final IntervalIterator intervals; - private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; private float freq = -1; @@ -31,13 +30,12 @@ class IntervalScorer extends Scorer { protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = intervals.approximation(); this.simScorer = simScorer; } @Override public int docID() { - return approximation.docID(); + return intervals.docID(); } @Override @@ -78,7 +76,7 @@ public DocIdSetIterator iterator() { @Override public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(approximation) { + return new TwoPhaseIterator(intervals) { @Override public boolean matches() throws IOException { return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index a9eb01e3937c..986b0f7fc479 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -48,37 +48,22 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); float cost = PhraseQuery.termPositionsCost(te); - return new IntervalIterator() { + return new IntervalIterator(pe) { - int doc = -1, pos = -1, upto; - - @Override - public DocIdSetIterator approximation() { - return pe; - } + int pos = -1, upto; @Override public int start() { - if (doc != pe.docID()) { - return -1; - } return pos; } @Override public int end() { - if (doc != pe.docID()) { - return -1; - } return pos; } @Override public int nextInterval() throws IOException { - if (doc != pe.docID()) { - doc = pe.docID(); - upto = pe.freq(); - } if (upto <= 0) return pos = NO_MORE_INTERVALS; upto--; @@ -86,10 +71,22 @@ public int nextInterval() throws IOException { } @Override - public float cost() { + public float matchCost() { return cost; } + @Override + protected void reset() throws IOException { + if (pe.docID() == NO_MORE_DOCS) { + upto = -1; + pos = NO_MORE_INTERVALS; + } + else { + upto = pe.freq(); + pos = -1; + } + } + @Override public String toString() { return pe.docID() + ":" + pos; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index ec28cef73b2d..0e7f7ce604c5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -97,13 +97,13 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.approximation().docID() == doc || - (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { + if (intervals.docID() == doc || + (intervals.docID() < doc && intervals.advance(doc) == doc)) { int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); From 562d3f7c769600856fcfe67459c0ca00fba480c5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 12 Mar 2018 09:09:11 +0000 Subject: [PATCH 35/83] Javadocs --- .../apache/lucene/search/IntervalIterator.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 80451403237c..1064ba062fa9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,12 +20,19 @@ import java.io.IOException; /** - * Defines methods to iterate over the intervals that a term, phrase or more - * complex positional query matches on a document + * A {@link DocIdSetIterator} than also allows iteration over matching + * intervals in a document. * - * The iterator is advanced by calling {@link #advance(int)} or {@link #nextDoc()}. - * Consumers should then call {@link #nextInterval()} to retrieve intervals until - * {@link #NO_MORE_INTERVALS} is returned. + * Once the iterator is positioned on a document by calling {@link #advance(int)} + * or {@link #nextDoc()}, intervals may be retrieved by calling {@link #nextInterval()} + * until {@link #NO_MORE_INTERVALS} is returned. + * + * The limits of the current interval are returned by {@link #start()} and {@link #end()}. + * When the iterator has been moved to a new document, but before {@link #nextInterval()} + * has been called, both these methods return {@code -1}. + * + * Note that it is possible for a document to return {@link #NO_MORE_INTERVALS} + * on the first call to {@link #nextInterval()} */ public abstract class IntervalIterator extends DocIdSetIterator { From 20be02d331d9cef0f275d8651a4543f3051f5ae8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 12 Mar 2018 09:10:40 +0000 Subject: [PATCH 36/83] javadocs --- .../org/apache/lucene/search/DisjunctionIntervalsSource.java | 5 ----- .../src/java/org/apache/lucene/search/IntervalIterator.java | 4 ++-- .../src/java/org/apache/lucene/search/IntervalQuery.java | 3 +++ .../src/java/org/apache/lucene/search/IntervalsSource.java | 3 +++ .../java/org/apache/lucene/search/TermIntervalsSource.java | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 2f7b01e88a8c..555f95b83a63 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -160,11 +160,6 @@ public int nextInterval() throws IOException { return current.start(); } - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } - private boolean contains(IntervalIterator it, int start, int end) { return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 1064ba062fa9..31dbffe86d46 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,7 +20,7 @@ import java.io.IOException; /** - * A {@link DocIdSetIterator} than also allows iteration over matching + * A {@link DocIdSetIterator} that also allows iteration over matching * intervals in a document. * * Once the iterator is positioned on a document by calling {@link #advance(int)} @@ -89,7 +89,7 @@ public final long cost() { /** * Advance the iterator to the next interval * - * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if + * @return the start of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if * there are no more intervals on the current document */ public abstract int nextInterval() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 83a81b062a24..aa843fc06d7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -34,6 +34,9 @@ /** * A query that retrieves documents containing intervals returned from an * {@link IntervalsSource} + * + * Static constructor functions for various different sources can be found in the + * {@link Intervals} class */ public final class IntervalQuery extends Query { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index e1d2fe1598cb..fb923ee5fade 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -27,6 +27,9 @@ /** * A helper class for {@link IntervalQuery} that provides an {@link IntervalIterator} * for a given field and segment + * + * Static constructor functions for various different sources can be found in the + * {@link Intervals} class */ public abstract class IntervalsSource { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 986b0f7fc479..b7d7e10afaf3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -89,7 +89,7 @@ protected void reset() throws IOException { @Override public String toString() { - return pe.docID() + ":" + pos; + return term.utf8ToString() + ":" + super.toString(); } }; } From d68b3d903b07ecaeae0f09a6d3ea632e3ee322c3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 14 Mar 2018 14:55:03 +0000 Subject: [PATCH 37/83] Move intervals to sandbox --- .../org/apache/lucene/search/DisiWrapper.java | 18 +- .../ConjunctionIntervalIterator.java | 6 +- .../ConjunctionIntervalsSource.java | 2 +- .../DifferenceIntervalFunction.java | 2 +- .../intervals}/DifferenceIntervalsSource.java | 2 +- .../lucene/intervals/DisiPriorityQueue.java | 171 ++++++++++++++++++ .../apache/lucene/intervals/DisiWrapper.java | 49 +++++ .../DisjunctionDISIApproximation.java | 76 ++++++++ .../DisjunctionIntervalsSource.java | 7 +- .../lucene/intervals}/IntervalFilter.java | 2 +- .../lucene/intervals}/IntervalFunction.java | 47 +++-- .../lucene/intervals}/IntervalIterator.java | 5 +- .../lucene/intervals}/IntervalQuery.java | 16 +- .../lucene/intervals}/IntervalScorer.java | 9 +- .../apache/lucene/intervals}/Intervals.java | 7 +- .../lucene/intervals}/IntervalsSource.java | 3 +- .../intervals}/LowpassIntervalsSource.java | 2 +- .../intervals}/TermIntervalsSource.java | 45 ++++- .../lucene/intervals}/TestIntervalQuery.java | 6 +- .../lucene/intervals}/TestIntervals.java | 11 +- 20 files changed, 408 insertions(+), 78 deletions(-) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/ConjunctionIntervalIterator.java (93%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/ConjunctionIntervalsSource.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DifferenceIntervalFunction.java (99%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DifferenceIntervalsSource.java (98%) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DisjunctionIntervalsSource.java (96%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalFilter.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalFunction.java (85%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalIterator.java (96%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalQuery.java (90%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalScorer.java (89%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/Intervals.java (97%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalsSource.java (97%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/LowpassIntervalsSource.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/TermIntervalsSource.java (60%) rename lucene/{core/src/test/org/apache/lucene/search => sandbox/src/test/org/apache/lucene/intervals}/TestIntervalQuery.java (97%) rename lucene/{core/src/test/org/apache/lucene/search => sandbox/src/test/org/apache/lucene/intervals}/TestIntervals.java (95%) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index edca0e3a26a6..5fa01d1c3e4c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -46,14 +46,9 @@ public class DisiWrapper { public int lastApproxMatchDoc; // last doc of approximation that did match public int lastApproxNonMatchDoc; // last doc of approximation that did not match - // For IntervalIterators - // TODO clean this up! - public final IntervalIterator intervals; - public DisiWrapper(Scorer scorer) { this.scorer = scorer; this.spans = null; - this.intervals = null; this.iterator = scorer.iterator(); this.cost = iterator.cost(); this.doc = -1; @@ -71,7 +66,6 @@ public DisiWrapper(Scorer scorer) { public DisiWrapper(Spans spans) { this.scorer = null; this.spans = spans; - this.intervals = null; this.iterator = spans; this.cost = iterator.cost(); this.doc = -1; @@ -88,16 +82,6 @@ public DisiWrapper(Spans spans) { this.lastApproxMatchDoc = -2; } - public DisiWrapper(IntervalIterator iterator) { - this.scorer = null; - this.spans = null; - this.intervals = iterator; - this.iterator = iterator; - this.cost = iterator.cost(); - this.doc = -1; - this.twoPhaseView = null; - this.approximation = iterator; - this.matchCost = iterator.matchCost(); - } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java similarity index 93% rename from lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java index cfda0de03f50..3fd9daed2de9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; -import java.io.IOException; -import java.util.ArrayList; import java.util.List; +import org.apache.lucene.search.ConjunctionDISI; + abstract class ConjunctionIntervalIterator extends IntervalIterator { final List subIterators; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java index 2ee0422786c9..30874d96d5e6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.ArrayList; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java index bc89bfed70b3..10c697775abf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java index d26217729026..03ebdd5134cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java new file mode 100644 index 000000000000..4f1e5c9511b1 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.intervals; + + +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.lucene.util.PriorityQueue; + +/** + * A priority queue of DocIdSetIterators that orders by current doc ID. + * This specialization is needed over {@link PriorityQueue} because the + * pluggable comparison function makes the rebalancing quite slow. + * @lucene.internal + */ +public final class DisiPriorityQueue implements Iterable { + + static int leftNode(int node) { + return ((node + 1) << 1) - 1; + } + + static int rightNode(int leftNode) { + return leftNode + 1; + } + + static int parentNode(int node) { + return ((node + 1) >>> 1) - 1; + } + + private final DisiWrapper[] heap; + private int size; + + public DisiPriorityQueue(int maxSize) { + heap = new DisiWrapper[maxSize]; + size = 0; + } + + public int size() { + return size; + } + + public DisiWrapper top() { + return heap[0]; + } + + /** Get the list of scorers which are on the current doc. */ + public DisiWrapper topList() { + final DisiWrapper[] heap = this.heap; + final int size = this.size; + DisiWrapper list = heap[0]; + list.next = null; + if (size >= 3) { + list = topList(list, heap, size, 1); + list = topList(list, heap, size, 2); + } else if (size == 2 && heap[1].doc == list.doc) { + list = prepend(heap[1], list); + } + return list; + } + + // prepend w1 (iterator) to w2 (list) + private DisiWrapper prepend(DisiWrapper w1, DisiWrapper w2) { + w1.next = w2; + return w1; + } + + private DisiWrapper topList(DisiWrapper list, DisiWrapper[] heap, + int size, int i) { + final DisiWrapper w = heap[i]; + if (w.doc == list.doc) { + list = prepend(w, list); + final int left = leftNode(i); + final int right = left + 1; + if (right < size) { + list = topList(list, heap, size, left); + list = topList(list, heap, size, right); + } else if (left < size && heap[left].doc == list.doc) { + list = prepend(heap[left], list); + } + } + return list; + } + + public DisiWrapper add(DisiWrapper entry) { + final DisiWrapper[] heap = this.heap; + final int size = this.size; + heap[size] = entry; + upHeap(size); + this.size = size + 1; + return heap[0]; + } + + public DisiWrapper pop() { + final DisiWrapper[] heap = this.heap; + final DisiWrapper result = heap[0]; + final int i = --size; + heap[0] = heap[i]; + heap[i] = null; + downHeap(i); + return result; + } + + public DisiWrapper updateTop() { + downHeap(size); + return heap[0]; + } + + DisiWrapper updateTop(DisiWrapper topReplacement) { + heap[0] = topReplacement; + return updateTop(); + } + + void upHeap(int i) { + final DisiWrapper node = heap[i]; + final int nodeDoc = node.doc; + int j = parentNode(i); + while (j >= 0 && nodeDoc < heap[j].doc) { + heap[i] = heap[j]; + i = j; + j = parentNode(j); + } + heap[i] = node; + } + + void downHeap(int size) { + int i = 0; + final DisiWrapper node = heap[0]; + int j = leftNode(i); + if (j < size) { + int k = rightNode(j); + if (k < size && heap[k].doc < heap[j].doc) { + j = k; + } + if (heap[j].doc < node.doc) { + do { + heap[i] = heap[j]; + i = j; + j = leftNode(i); + k = rightNode(j); + if (k < size && heap[k].doc < heap[j].doc) { + j = k; + } + } while (j < size && heap[j].doc < node.doc); + heap[i] = node; + } + } + } + + @Override + public Iterator iterator() { + return Arrays.asList(heap).subList(0, size).iterator(); + } + +} + + diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java new file mode 100644 index 000000000000..78c2cc215546 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.intervals; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; + +public class DisiWrapper { + + public final DocIdSetIterator iterator; + public final IntervalIterator intervals; + public final long cost; + public final float matchCost; // the match cost for two-phase iterators, 0 otherwise + public int doc; // the current doc, used for comparison + public DisiWrapper next; // reference to a next element, see #topList + + // An approximation of the iterator, or the iterator itself if it does not + // support two-phase iteration + public final DocIdSetIterator approximation; + // A two-phase view of the iterator, or null if the iterator does not support + // two-phase iteration + public final TwoPhaseIterator twoPhaseView; + + public DisiWrapper(IntervalIterator iterator) { + this.intervals = iterator; + this.iterator = iterator; + this.cost = iterator.cost(); + this.doc = -1; + this.twoPhaseView = null; + this.approximation = iterator; + this.matchCost = iterator.matchCost(); + } + +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java new file mode 100644 index 000000000000..30ab9d4bc676 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.intervals; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; + +/** + * A {@link DocIdSetIterator} which is a disjunction of the approximations of + * the provided iterators. + * @lucene.internal + */ +class DisjunctionDISIApproximation extends DocIdSetIterator { + + final DisiPriorityQueue subIterators; + final long cost; + + public DisjunctionDISIApproximation(DisiPriorityQueue subIterators) { + this.subIterators = subIterators; + long cost = 0; + for (DisiWrapper w : subIterators) { + cost += w.cost; + } + this.cost = cost; + } + + @Override + public long cost() { + return cost; + } + + @Override + public int docID() { + return subIterators.top().doc; + } + + @Override + public int nextDoc() throws IOException { + DisiWrapper top = subIterators.top(); + final int doc = top.doc; + do { + top.doc = top.approximation.nextDoc(); + top = subIterators.updateTop(); + } while (top.doc == doc); + + return top.doc; + } + + @Override + public int advance(int target) throws IOException { + DisiWrapper top = subIterators.top(); + do { + top.doc = top.approximation.advance(target); + top = subIterators.updateTop(); + } while (top.doc < target); + + return top.doc; + } +} + + diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java similarity index 96% rename from lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java index 555f95b83a63..f1b2381b3694 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.ArrayList; @@ -26,6 +26,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.PriorityQueue; class DisjunctionIntervalsSource extends IntervalsSource { @@ -148,13 +149,13 @@ public int nextInterval() throws IOException { int start = current.start(), end = current.end(); while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) { IntervalIterator it = intervalQueue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { intervalQueue.add(it); } } if (intervalQueue.size() == 0) { current = EMPTY; - return IntervalIterator.NO_MORE_INTERVALS; + return NO_MORE_INTERVALS; } current = intervalQueue.top(); return current.start(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java index 3add87d78243..47fea70f312f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java similarity index 85% rename from lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java index 628c54cef4c8..2299f152dabe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java @@ -15,11 +15,10 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.List; -import java.util.Objects; import org.apache.lucene.util.PriorityQueue; @@ -69,20 +68,20 @@ public int end() { @Override public int nextInterval() throws IOException { - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; int i = 1; while (i < subIterators.size()) { while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) { - if (subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) { i = i + 1; } else { - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; i = 1; } } @@ -127,7 +126,7 @@ public int end() { @Override public int nextInterval() throws IOException { - start = end = NO_MORE_INTERVALS; + start = end = IntervalIterator.NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; i = 1; while (true) { @@ -137,7 +136,7 @@ public int nextInterval() throws IOException { if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end()) break; do { - if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) return start; } while (subIterators.get(i).start() <= subIterators.get(i - 1).end()); @@ -147,7 +146,7 @@ public int nextInterval() throws IOException { end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); i = 1; - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) return start; } } @@ -213,20 +212,20 @@ void updateRightExtreme(IntervalIterator it) { public int nextInterval() throws IOException { while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); updateRightExtreme(it); } } if (this.queue.size() < subIterators.length) - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; do { start = queue.top().start(); end = queueEnd; if (queue.top().end() == end) return start; IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); updateRightExtreme(it); } @@ -274,16 +273,16 @@ public int end() { @Override public int nextInterval() throws IOException { if (bpos == false) - return NO_MORE_INTERVALS; - while (a.nextInterval() != NO_MORE_INTERVALS) { + return IntervalIterator.NO_MORE_INTERVALS; + while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { while (b.start() < a.start() && b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (a.start() <= b.start() && a.end() >= b.end()) return a.start(); } - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } @Override @@ -321,16 +320,16 @@ public int end() { @Override public int nextInterval() throws IOException { if (bpos == false) - return NO_MORE_INTERVALS; - while (a.nextInterval() != NO_MORE_INTERVALS) { + return IntervalIterator.NO_MORE_INTERVALS; + while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { while (b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (b.start() <= a.start()) return a.start(); } - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java similarity index 96% rename from lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java index 31dbffe86d46..f6a5f89efc12 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java @@ -15,10 +15,13 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; + /** * A {@link DocIdSetIterator} that also allows iteration over matching * intervals in a document. diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java similarity index 90% rename from lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java index aa843fc06d7c..4d2c22f97a35 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java @@ -15,20 +15,26 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Objects; import java.util.Set; -import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; /** @@ -74,7 +80,7 @@ private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; for (Term term : terms) { - TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.readerContext, term, true)); + TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.getTopReaderContext(), term, true)); if (termStatistics != null) { termStats[termUpTo++] = termStatistics; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java similarity index 89% rename from lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java index 279d3e00885c..a28eddcf16fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java @@ -15,10 +15,17 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + class IntervalScorer extends Scorer { private final IntervalIterator intervals; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java similarity index 97% rename from lucene/core/src/java/org/apache/lucene/search/Intervals.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java index aa29fa2f4b99..7b95e4cf0f7b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java @@ -15,15 +15,10 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; -import java.io.IOException; import java.util.Arrays; -import java.util.Objects; -import java.util.Set; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; /** diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java similarity index 97% rename from lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java index fb923ee5fade..405423bec334 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java @@ -15,10 +15,9 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; -import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; diff --git a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java index 39f24fbfb670..82f4acf785be 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java similarity index 60% rename from lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java index b7d7e10afaf3..00ed08984720 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java @@ -15,17 +15,23 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; import java.util.Set; +import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.util.BytesRef; class TermIntervalsSource extends IntervalsSource { @@ -47,7 +53,7 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO TermsEnum te = terms.iterator(); te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); - float cost = PhraseQuery.termPositionsCost(te); + float cost = termPositionsCost(te); return new IntervalIterator(pe) { int pos = -1, upto; @@ -116,4 +122,39 @@ public String toString() { public void extractTerms(String field, Set terms) { terms.add(new Term(field, term)); } + + /** A guess of + * the average number of simple operations for the initial seek and buffer refill + * per document for the positions of a term. + * See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}. + *

    + * Aside: Instead of being constant this could depend among others on + * {@link Lucene50PostingsFormat#BLOCK_SIZE}, + * {@link TermsEnum#docFreq()}, + * {@link TermsEnum#totalTermFreq()}, + * {@link DocIdSetIterator#cost()} (expected number of matching docs), + * {@link LeafReader#maxDoc()} (total number of docs in the segment), + * and the seek time and block size of the device storing the index. + */ + private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; + + /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()} + * when no seek or buffer refill is done. + */ + private static final int TERM_OPS_PER_POS = 7; + + /** Returns an expected cost in simple operations + * of processing the occurrences of a term + * in a document that contains the term. + * This is for use by {@link TwoPhaseIterator#matchCost} implementations. + * @param termsEnum The term is the term at which this TermsEnum is positioned. + */ + static float termPositionsCost(TermsEnum termsEnum) throws IOException { + // TODO: When intervals move to core, refactor to use the copy of this in PhraseQuery + int docFreq = termsEnum.docFreq(); + assert docFreq > 0; + long totalTermFreq = termsEnum.totalTermFreq(); + float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; + return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java similarity index 97% rename from lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java rename to lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java index 6c1ba8ff6c1b..489603f67fc0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; @@ -24,7 +24,9 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java similarity index 95% rename from lucene/core/src/test/org/apache/lucene/search/TestIntervals.java rename to lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java index 0e7f7ce604c5..182299ae9443 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; @@ -31,10 +31,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -88,7 +85,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; - for (LeafReaderContext ctx : searcher.leafContexts) { + for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { assertNull(source.intervals(field + "fake", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); @@ -123,7 +120,7 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa public void testIntervalsOnFieldWithNoPositions() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { - Intervals.term("wibble").intervals("id", searcher.leafContexts.get(0)); + Intervals.term("wibble").intervals("id", searcher.getIndexReader().leaves().get(0)); }); assertEquals("Cannot create an IntervalIterator over field id because it has no indexed positions", e.getMessage()); } From d3f6e3bafc4af11e2aa365537a2b7a47fd01b5da Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Dec 2017 14:42:48 +0000 Subject: [PATCH 38/83] WIP: terms and ordered near --- .../apache/lucene/search/ConjunctionDISI.java | 15 ++ .../apache/lucene/search/FilterWeight.java | 5 + .../apache/lucene/search/IntervalFilter.java | 66 ++++++ .../lucene/search/IntervalFunction.java | 70 +++++++ .../lucene/search/IntervalIterator.java | 74 +++++++ .../apache/lucene/search/IntervalQuery.java | 149 ++++++++++++++ .../apache/lucene/search/IntervalScorer.java | 76 +++++++ .../org/apache/lucene/search/Intervals.java | 191 ++++++++++++++++++ .../apache/lucene/search/LRUQueryCache.java | 5 + .../java/org/apache/lucene/search/Scorer.java | 1 + .../org/apache/lucene/search/TermQuery.java | 15 ++ .../org/apache/lucene/search/TermScorer.java | 1 + .../java/org/apache/lucene/search/Weight.java | 4 + .../lucene/search/TestIntervalQuery.java | 104 ++++++++++ .../apache/lucene/search/TestIntervals.java | 152 ++++++++++++++ 15 files changed, 928 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/Intervals.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestIntervals.java diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 780e854033a8..a5eabcc0266a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -71,6 +71,21 @@ public static DocIdSetIterator intersectIterators(List iterato return createConjunction(allIterators, twoPhaseIterators); } + public static DocIdSetIterator intersectIntervals(List iterators) { + if (iterators.size() < 2) { + throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); + } + final List allIterators = new ArrayList<>(); + final List twoPhaseIterators = new ArrayList<>(); + for (IntervalIterator iterator : iterators) { + if (iterator == null) + return DocIdSetIterator.empty(); + addIterator(iterator.approximation(), allIterators, twoPhaseIterators); + } + + return createConjunction(allIterators, twoPhaseIterators); + } + /** Create a conjunction over the provided {@link Spans}. Note that the * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 925c9534f898..bdb9108bcc42 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -60,6 +60,11 @@ public boolean isCacheable(LeafReaderContext ctx) { return in.isCacheable(ctx); } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return in.intervals(context, field); + } + @Override public void extractTerms(Set terms) { in.extractTerms(terms); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java new file mode 100644 index 000000000000..4b0812120db5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +public abstract class IntervalFilter implements IntervalIterator { + + private final IntervalIterator in; + + public IntervalFilter(IntervalIterator in) { + this.in = in; + } + + protected abstract boolean accept(); + + @Override + public final int nextInterval() throws IOException { + int next; + do { + next = in.nextInterval(); + } + while (accept() == false && next != Intervals.NO_MORE_INTERVALS); + return next; + } + + @Override + public final int start() { + return in.start(); + } + + @Override + public final int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public DocIdSetIterator approximation() { + return in.approximation(); + } + + @Override + public void advanceTo(int doc) throws IOException { + in.advanceTo(doc); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java new file mode 100644 index 000000000000..0db038febea1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +public abstract class IntervalFunction implements Function, IntervalIterator> { + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + + @Override + public abstract String toString(); + + public static class OrderedNearFunction extends IntervalFunction { + + public OrderedNearFunction(int minWidth, int maxWidth) { + this.minWidth = minWidth; + this.maxWidth = maxWidth; + } + + final int minWidth; + final int maxWidth; + + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.innerWidthFilter(Intervals.orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + } + + @Override + public String toString() { + return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OrderedNearFunction that = (OrderedNearFunction) o; + return minWidth == that.minWidth && + maxWidth == that.maxWidth; + } + + @Override + public int hashCode() { + return Objects.hash(minWidth, maxWidth); + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java new file mode 100644 index 000000000000..af4e0520a86e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.index.PostingsEnum; + +public interface IntervalIterator { + + DocIdSetIterator approximation(); + + int start(); + + int end(); + + int innerWidth(); + + void advanceTo(int doc) throws IOException; + + int nextInterval() throws IOException; + + default float score() { + return (float) (1.0 / (1.0 + (end() - start()))); + } + + IntervalIterator EMPTY = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + return DocIdSetIterator.empty(); + } + + @Override + public int start() { + return -1; + } + + @Override + public int end() { + return -1; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public void advanceTo(int doc) throws IOException { + + } + + @Override + public int nextInterval() throws IOException { + return Intervals.NO_MORE_INTERVALS; + } + }; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java new file mode 100644 index 000000000000..068912e93855 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; + +public final class IntervalQuery extends Query { + + private final String field; + private final List subQueries; + private final IntervalFunction iteratorFunction; + + public static IntervalQuery orderedNearQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); + } + + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + this.field = field; + this.subQueries = subQueries; + this.iteratorFunction = iteratorFunction; + } + + public String getField() { + return field; + } + + @Override + public String toString(String field) { + return iteratorFunction.toString() + subQueries.stream().map(Object::toString) + .collect(Collectors.joining(",", "(", ")")); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + List subWeights = new ArrayList<>(); + for (Query q : subQueries) { + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_NO_SCORES, boost)); + } + return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); + } + + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights) { + // nocommit + return new Similarity.SimScorer(field) { + @Override + public float score(float freq, long norm) { + return 1; + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IntervalQuery that = (IntervalQuery) o; + return Objects.equals(field, that.field) && + Objects.equals(subQueries, that.subQueries) && + Objects.equals(iteratorFunction, that.iteratorFunction); + } + + @Override + public int hashCode() { + return Objects.hash(field, subQueries, iteratorFunction); + } + + private class IntervalWeight extends Weight { + + final List subWeights; + final Similarity.SimScorer simScorer; + final ScoreMode scoreMode; + + public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, ScoreMode scoreMode) { + super(query); + this.subWeights = subWeights; + this.simScorer = simScorer; + this.scoreMode = scoreMode; + } + + @Override + public void extractTerms(Set terms) { + for (Weight w : subWeights) { + w.extractTerms(terms); + } + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context); + if (scorer != null && scorer.iterator().advance(doc) == doc) { + return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this + } + return Explanation.noMatch("No matching intervals"); + } + + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + List subIntervals = new ArrayList<>(); + for (Weight w : subWeights) { + subIntervals.add(w.intervals(context, field)); + } + return IntervalQuery.this.iteratorFunction.apply(subIntervals); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + IntervalIterator intervals = intervals(context, field); + if (intervals == IntervalIterator.EMPTY) + return null; + LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit + return new IntervalScorer(this, intervals, leafScorer); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + for (Weight w : subWeights) { + if (w.isCacheable(ctx) == false) + return false; + } + return true; + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java new file mode 100644 index 000000000000..24c7585d44cd --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +class IntervalScorer extends Scorer { + + private final IntervalIterator intervals; + private final DocIdSetIterator approximation; + private final LeafSimScorer simScorer; + + protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { + super(weight); + this.intervals = intervals; + this.approximation = intervals.approximation(); + this.simScorer = simScorer; + } + + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public float score() throws IOException { + float freq = 0; + do { + freq += intervals.score(); + } while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + return simScorer.score(docID(), freq); + } + + @Override + public DocIdSetIterator iterator() { + return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return new TwoPhaseIterator(approximation) { + @Override + public boolean matches() throws IOException { + intervals.advanceTo(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + }; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java new file mode 100644 index 000000000000..db67f0909673 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.PostingsEnum; + +public final class Intervals { + + public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; + + public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = end() - start(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = innerWidth(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator termIterator(PostingsEnum pe) { + return new TermIntervalIterator(pe); + } + + private static class TermIntervalIterator implements IntervalIterator { + + public TermIntervalIterator(PostingsEnum pe) { + this.pe = pe; + } + + private final PostingsEnum pe; + + int upTo = -1; + int pos = -1; + + @Override + public DocIdSetIterator approximation() { + return pe; + } + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public void advanceTo(int doc) throws IOException { + pos = -1; + if (pe.docID() == doc || (pe.docID() < doc && pe.advance(doc) == doc)) { + upTo = pe.freq(); + } + else { + upTo = -1; + } + } + + @Override + public int nextInterval() throws IOException { + if (upTo <= 0) { + return pos = NO_MORE_INTERVALS; + } + upTo--; + return pos = pe.nextPosition(); + } + + @Override + public String toString() { + return pe.docID() + "[" + pos + "]"; + } + } + + public static IntervalIterator orderedIntervalIterator(List subIterators) { + for (IntervalIterator it : subIterators) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new OrderedIntervalIterator(subIterators); + } + + private static class OrderedIntervalIterator implements IntervalIterator { + + final List subIntervals; + final DocIdSetIterator approximation; + + int start; + int end; + int innerWidth; + int i; + + private OrderedIntervalIterator(List subIntervals) { + this.subIntervals = subIntervals; + this.approximation = ConjunctionDISI.intersectIntervals(subIntervals); + } + + @Override + public DocIdSetIterator approximation() { + return approximation; + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerWidth; + } + + @Override + public void advanceTo(int doc) throws IOException { + for (IntervalIterator it : subIntervals) { + it.advanceTo(doc); + } + subIntervals.get(0).nextInterval(); + i = 1; + start = end = innerWidth = Integer.MIN_VALUE; + } + + @Override + public int nextInterval() throws IOException { + start = end = NO_MORE_INTERVALS; + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + if (subIntervals.get(i - 1).end() >= b) + return start; + if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + break; + do { + if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + return start; + } + while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + i++; + } + start = subIntervals.get(0).start(); + end = subIntervals.get(subIntervals.size() - 1).end(); + b = subIntervals.get(subIntervals.size() - 1).start(); + innerWidth = b - subIntervals.get(0).end() - 1; + i = 1; + if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + return start; + } + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index beb73ad11159..f9b5bda6ce45 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -714,6 +714,11 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { && leavesToCache.test(context); } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return in.intervals(context, field); + } + @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 81624ccac9d8..f1da1fb50cf6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -178,4 +178,5 @@ public int advanceShallow(int target) throws IOException { * included and {@code upTo} included. */ public abstract float getMaxScore(int upTo) throws IOException; + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index f1f44154f554..da06023b7c30 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; @@ -85,6 +86,20 @@ public String toString() { return "weight(" + TermQuery.this + ")"; } + @Override + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + if (term.field().equals(field) == false) { + return null; + } + assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; + final TermsEnum termsEnum = getTermsEnum(context); + if (termsEnum == null) { + return null; + } + PostingsEnum pe = termsEnum.postings(null, PostingsEnum.POSITIONS); + return Intervals.termIterator(pe); + } + @Override public Scorer scorer(LeafReaderContext context) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index d51626fda8c1..1d9d5e670ce2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -27,6 +27,7 @@ /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { + private final PostingsEnum postingsEnum; private final ImpactsEnum impactsEnum; private final DocIdSetIterator iterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 7853ccf2465b..b98a17f9e02b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -102,6 +102,10 @@ public final Query getQuery() { */ public abstract Scorer scorer(LeafReaderContext context) throws IOException; + public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + return null; + } + /** * Optional method. * Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java new file mode 100644 index 000000000000..bc85bbc58cb7 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIntervalQuery extends LuceneTestCase { + + private IndexSearcher searcher; + private IndexReader reader; + private Directory directory; + + public static final String field = "field"; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newTextField(field, docFields[i], Field.Store.YES)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3", + "w1 xx w2 yy w3", + "w1 w3 xx w2 yy w3", + "w2 w1", + "w2 w1 w3 w2" + }; + + private void checkHits(Query query, int[] results) throws IOException { + CheckHits.checkHits(random(), query, field, searcher, results); + } + + public void testOrderedNearQueryWidth0() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0 }); + } + + public void testOrderedNearQueryWidth1() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0, 1, 2, 5 }); + } + + public void testOrderedNearQueryWidth2() throws IOException { + checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), + new TermQuery(new Term(field, "w2"))), + new int[]{ 0, 1, 2, 3, 5 }); + } + + public void testNestedOrderedNearQuery() throws IOException { + // onear/1(w1, onear/2(w2, w3)) + Query q = IntervalQuery.orderedNearQuery(field, 1, + new TermQuery(new Term(field, "w1")), + IntervalQuery.orderedNearQuery(field, 2, + new TermQuery(new Term(field, "w2")), + new TermQuery(new Term(field, "w3"))) + ); + + checkHits(q, new int[]{ 0, 1, 2 }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java new file mode 100644 index 000000000000..b38e27bba222 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestIntervals extends LuceneTestCase { + + private static String field1_docs[] = { + "Nothing of interest to anyone here", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", + "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", + "Nor here, nowt hot going on in this one", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", + "Porridge is great" + }; + + private static String field2_docs[] = { + "In Xanadu did Kubla Khan a stately pleasure dome decree", + "Where Alph the sacred river ran through caverns measureless to man", + "Down to a sunless sea", + "So thrice five miles of fertile ground", + "With walls and towers were girdled round", + "Which was nice" + }; + + private static Directory directory; + private static IndexSearcher searcher; + private static Analyzer analyzer = new StandardAnalyzer(); + + @BeforeClass + public static void setupIndex() throws IOException { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(analyzer)); + for (int i = 0; i < field1_docs.length; i++) { + Document doc = new Document(); + doc.add(new TextField("field1", field1_docs[i], Field.Store.NO)); + doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); + doc.add(new NumericDocValuesField("id", i)); + writer.addDocument(doc); + } + writer.close(); + searcher = new IndexSearcher(DirectoryReader.open(directory)); + } + + @AfterClass + public static void teardownIndex() throws IOException { + IOUtils.close(searcher.getIndexReader(), directory); + } + + public void testTermQueryIntervals() throws IOException { + int[][] expected = new int[][]{ + {}, + { 1, 4, 7 }, + { 1, 4, 7 }, + {}, + { 1, 4, 7 }, + { 0 } + }; + + Weight weight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "porridge")), ScoreMode.COMPLETE); + for (LeafReaderContext ctx : searcher.leafContexts) { + assertNull(weight.intervals(ctx, "field2")); + NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); + IntervalIterator intervals = weight.intervals(ctx, "field1"); + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + ids.advance(doc); + int id = (int) ids.longValue(); + intervals.advanceTo(doc); + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i], intervals.end()); + i++; + } + assertEquals(expected[id].length, i); + } + } + + } + + public void testOrderedNearIntervals() throws IOException { + + int[][] expected = new int[][]{ + {}, + { 0, 2, 6, 17 }, + { 3, 5, 6, 21 }, + {}, + { 0, 2, 6, 17 }, + { } + }; + + Weight peaseWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "pease")), ScoreMode.COMPLETE); + Weight hotWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "hot")), ScoreMode.COMPLETE); + for (LeafReaderContext ctx : searcher.leafContexts) { + NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); + IntervalIterator intervals = Intervals.orderedIntervalIterator( + Arrays.asList(peaseWeight.intervals(ctx, "field1"), hotWeight.intervals(ctx, "field1")) + ); + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + ids.advance(doc); + int id = (int) ids.longValue(); + intervals.advanceTo(doc); + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; + } + assertEquals(expected[id].length, i); + } + } + + } + +} From 350043b0c2c725087e89d9b93d1c2245a6b09185 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 20 Feb 2018 14:25:51 +0000 Subject: [PATCH 39/83] WIP --- .../lucene/document/RangeFieldQuery.java | 6 +- .../SortedNumericDocValuesRangeQuery.java | 2 +- .../SortedSetDocValuesRangeQuery.java | 2 +- .../lucene/index/FrozenBufferedUpdates.java | 2 +- .../org/apache/lucene/index/PostingsEnum.java | 4 + .../java/org/apache/lucene/index/Sorter.java | 25 ----- .../search/BlockMaxConjunctionScorer.java | 4 + .../apache/lucene/search/BooleanWeight.java | 17 +-- .../lucene/search/CachingCollector.java | 5 + .../apache/lucene/search/ConjunctionDISI.java | 15 --- .../lucene/search/ConjunctionScorer.java | 13 +-- .../lucene/search/ConstantScoreQuery.java | 8 +- .../lucene/search/ConstantScoreScorer.java | 5 + .../lucene/search/ConstantScoreWeight.java | 3 +- .../org/apache/lucene/search/DisiWrapper.java | 17 +++ .../lucene/search/DisjunctionMaxQuery.java | 4 +- .../lucene/search/DisjunctionScorer.java | 5 + .../search/DocValuesFieldExistsQuery.java | 2 +- .../lucene/search/DocValuesRewriteMethod.java | 2 +- .../lucene/search/DoubleValuesSource.java | 3 +- .../lucene/search/ExactPhraseScorer.java | 5 + .../org/apache/lucene/search/FakeScorer.java | 5 + .../apache/lucene/search/FilterScorer.java | 5 + .../apache/lucene/search/FilterWeight.java | 9 +- .../lucene/search/IndexOrDocValuesQuery.java | 10 +- .../apache/lucene/search/IntervalFilter.java | 9 +- .../lucene/search/IntervalIterator.java | 10 +- .../apache/lucene/search/IntervalQuery.java | 21 ++-- .../apache/lucene/search/IntervalScorer.java | 15 ++- .../org/apache/lucene/search/Intervals.java | 105 ++++++++++++++---- .../apache/lucene/search/LRUQueryCache.java | 21 ++-- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MatchNoDocsQuery.java | 2 +- .../search/MinShouldMatchSumScorer.java | 5 + .../lucene/search/MultiPhraseQuery.java | 6 +- .../MultiTermQueryConstantScoreWrapper.java | 4 +- .../lucene/search/NormsFieldExistsQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 6 +- .../apache/lucene/search/PointInSetQuery.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 6 +- .../apache/lucene/search/QueryRescorer.java | 3 +- .../apache/lucene/search/ReqExclScorer.java | 5 + .../apache/lucene/search/ReqOptSumScorer.java | 11 ++ .../java/org/apache/lucene/search/Scorer.java | 2 + .../lucene/search/SloppyPhraseScorer.java | 5 + .../apache/lucene/search/SynonymQuery.java | 7 +- .../apache/lucene/search/TermInSetQuery.java | 4 +- .../org/apache/lucene/search/TermQuery.java | 20 +--- .../org/apache/lucene/search/TermScorer.java | 17 ++- .../org/apache/lucene/search/WANDScorer.java | 5 + .../java/org/apache/lucene/search/Weight.java | 17 ++- .../lucene/search/spans/SpanScorer.java | 6 + .../lucene/search/spans/SpanWeight.java | 4 +- .../lucene/search/TestIntervalQuery.java | 12 +- .../apache/lucene/search/TestIntervals.java | 84 +++++++------- .../apache/lucene/search/AssertingScorer.java | 5 + .../search/BulkScorerWrapperScorer.java | 5 + .../org/apache/lucene/search/QueryUtils.java | 15 +-- .../lucene/search/ScorerIndexSearcher.java | 3 +- .../search/spans/AssertingSpanWeight.java | 4 +- 60 files changed, 366 insertions(+), 262 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index a24b7cdfae58..d507da963f4c 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -290,7 +290,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (values == null) { @@ -350,8 +350,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 246b50f3dab6..0c0efdf56660 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -102,7 +102,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { SortedNumericDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index de7c11b1cc9a..cd6cfadac7f6 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -107,7 +107,7 @@ public Query rewrite(IndexReader reader) throws IOException { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { SortedSetDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 1f8974a510d0..8bfb19a97325 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -687,7 +687,7 @@ private long applyQueryDeletes(BufferedUpdatesStream.SegmentState[] segStates) t final IndexSearcher searcher = new IndexSearcher(readerContext.reader()); searcher.setQueryCache(null); final Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE_NO_SCORES); - final Scorer scorer = weight.scorer(readerContext); + final Scorer scorer = weight.scorer(readerContext, PostingsEnum.NONE); if (scorer != null) { final DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index fdd32a9f2fe0..bb93268ff92b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -63,6 +63,10 @@ public static boolean featureRequested(int flags, short feature) { return (flags & feature) == feature; } + public static short highest(short a, short b) { + return (short) Math.max(a, b); + } + private AttributeSource atts = null; /** Sole constructor. (For invocation by subclass diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index c47f9a118abb..a081ea7aaf3b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -445,30 +445,5 @@ public String getID() { public String toString() { return getID(); } - - static final Scorer FAKESCORER = new Scorer(null) { - - float score; - int doc = -1; - - @Override - public int docID() { - return doc; - } - - public DocIdSetIterator iterator() { - throw new UnsupportedOperationException(); - } - - @Override - public float score() throws IOException { - return score; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; - } - }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 070b6c40f025..2c625643ef15 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -240,4 +240,8 @@ public Collection getChildren() { return children; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index fffdd09093f1..ea1ffed6594a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -27,6 +27,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; @@ -112,7 +113,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio // contributions to the score to floats), so in order to make sure that // explanations have the same value as the score, we pull a scorer and // use it to compute the score. - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.NONE); int advanced = scorer.iterator().advance(doc); assert advanced == doc; return Explanation.match(scorer.score(), "sum of:", subs); @@ -210,7 +211,7 @@ private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOExcept /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} * cannot be used. */ - BulkScorer booleanScorer(LeafReaderContext context) throws IOException { + BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOException { final int numOptionalClauses = query.getClauses(Occur.SHOULD).size(); final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size(); @@ -262,7 +263,7 @@ BulkScorer booleanScorer(LeafReaderContext context) throws IOException { for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isProhibited()) { - Scorer scorer = w.scorer(context); + Scorer scorer = w.scorer(context, postings); if (scorer != null) { prohibited.add(scorer); } @@ -290,7 +291,7 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { // so that we can dynamically prune non-competitive hits. return super.bulkScorer(context); } - final BulkScorer bulkScorer = booleanScorer(context); + final BulkScorer bulkScorer = booleanScorer(context, PostingsEnum.NONE); if (bulkScorer != null) { // bulk scoring is applicable, use it return bulkScorer; @@ -301,8 +302,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } @@ -325,7 +326,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); final Map> scorers = new EnumMap<>(Occur.class); @@ -336,7 +337,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti Iterator cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - ScorerSupplier subScorer = w.scorerSupplier(context); + ScorerSupplier subScorer = w.scorerSupplier(context, postings); if (subScorer == null) { if (c.isRequired()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index 3bed88dd9980..ae705455ecfa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -64,6 +64,11 @@ public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public final float score() { return score; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index a5eabcc0266a..780e854033a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -71,21 +71,6 @@ public static DocIdSetIterator intersectIterators(List iterato return createConjunction(allIterators, twoPhaseIterators); } - public static DocIdSetIterator intersectIntervals(List iterators) { - if (iterators.size() < 2) { - throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); - } - final List allIterators = new ArrayList<>(); - final List twoPhaseIterators = new ArrayList<>(); - for (IntervalIterator iterator : iterators) { - if (iterator == null) - return DocIdSetIterator.empty(); - addIterator(iterator.approximation(), allIterators, twoPhaseIterators); - } - - return createConjunction(allIterators, twoPhaseIterators); - } - /** Create a conjunction over the provided {@link Spans}. Note that the * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 7a1b9563721b..f820cd0aaa8e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -101,14 +101,9 @@ public Collection getChildren() { return children; } - static final class DocsAndFreqs { - final long cost; - final DocIdSetIterator iterator; - int doc = -1; - - DocsAndFreqs(DocIdSetIterator iterator) { - this.iterator = iterator; - this.cost = iterator.cost(); - } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 464cde6a45f9..abdb85953dd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -125,8 +125,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context, postings); if (innerScorerSupplier == null) { return null; } @@ -159,8 +159,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 45a6bdbad041..56adcf390553 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -63,6 +63,11 @@ public DocIdSetIterator iterator() { return disi; } + @Override + public IntervalIterator intervals(String field) { + return null; + } + @Override public TwoPhaseIterator twoPhaseIterator() { return twoPhaseIterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index 671ec7103782..57316b9f7e4a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; /** @@ -53,7 +54,7 @@ protected final float score() { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - final Scorer s = scorer(context); + final Scorer s = scorer(context, PostingsEnum.NONE); final boolean exists; if (s == null) { exists = false; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index fac9418010f4..b8891b7c0ce2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,5 +81,22 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } + + public DisiWrapper(DocIdSetIterator disi) { + this.scorer = null; + this.spans = null; + this.iterator = disi; + this.cost = iterator.cost(); + this.doc = -1; + this.twoPhaseView = TwoPhaseIterator.unwrap(disi); + if (twoPhaseView != null) { + approximation = twoPhaseView.approximation(); + matchCost = twoPhaseView.matchCost(); + } + else { + approximation = iterator; + matchCost = 0f; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 1e67cb150465..552dff1283a5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -120,11 +120,11 @@ public void extractTerms(Set terms) { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context); + Scorer subScorer = w.scorer(context, postings); if (subScorer != null) { scorers.add(subScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 147b993f2d9f..f69fd936f453 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -180,6 +180,11 @@ public final float score() throws IOException { return score(getSubMatches()); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + /** Compute the score for the given linked list of scorers. */ protected abstract float score(DisiWrapper topList) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index 009f11cf116f..bbf9c3ced9a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -65,7 +65,7 @@ public String toString(String field) { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index 5d591983fab0..a141cbe64a72 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -75,7 +75,7 @@ public final int hashCode() { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field); TermsEnum termsEnum = query.getTermsEnum(new Terms() { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 3938d3f9ba22..d951fc45db83 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; /** * Base class for producing {@link DoubleValues} @@ -577,7 +578,7 @@ private WeightDoubleValuesSource(Weight weight) { @Override public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { - Scorer scorer = weight.scorer(ctx); + Scorer scorer = weight.scorer(ctx, PostingsEnum.NONE); if (scorer == null) return DoubleValues.EMPTY; DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index d7c4f9f6e2b8..d68f8557dd10 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -125,6 +125,11 @@ public float getMaxScore(int upTo) throws IOException { return docScorer.maxScore(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + /** Advance the given pos enum to the first doc on or after {@code target}. * Return {@code false} if the enum was exhausted before reaching * {@code target} and {@code true} otherwise. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index c8b34381b2b2..1fcac3a05107 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -45,6 +45,11 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 7bcb1ce4a64b..6de7e107300e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -67,6 +67,11 @@ public final int docID() { return in.docID(); } + @Override + public IntervalIterator intervals(String field) { + return in.intervals(field); + } + @Override public final DocIdSetIterator iterator() { return in.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index bdb9108bcc42..3ac351029097 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -60,11 +60,6 @@ public boolean isCacheable(LeafReaderContext ctx) { return in.isCacheable(ctx); } - @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return in.intervals(context, field); - } - @Override public void extractTerms(Set terms) { in.extractTerms(terms); @@ -76,8 +71,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - return in.scorer(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + return in.scorer(context, postings); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index f89924d16054..50b04523baa3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -133,9 +133,9 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context); - final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context, postings); + final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context, postings); if (indexScorerSupplier == null || dvScorerSupplier == null) { return null; } @@ -162,8 +162,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 4b0812120db5..852ac8070160 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,12 +55,7 @@ public int innerWidth() { } @Override - public DocIdSetIterator approximation() { - return in.approximation(); - } - - @Override - public void advanceTo(int doc) throws IOException { - in.advanceTo(doc); + public void reset() throws IOException { + in.reset(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index af4e0520a86e..a85c24011e1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -23,15 +23,13 @@ public interface IntervalIterator { - DocIdSetIterator approximation(); - int start(); int end(); int innerWidth(); - void advanceTo(int doc) throws IOException; + void reset() throws IOException; int nextInterval() throws IOException; @@ -40,10 +38,6 @@ default float score() { } IntervalIterator EMPTY = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - return DocIdSetIterator.empty(); - } @Override public int start() { @@ -61,7 +55,7 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { + public void reset() { } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 068912e93855..16de6fc9c1a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -26,6 +26,7 @@ import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; @@ -111,7 +112,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null && scorer.iterator().advance(doc) == doc) { return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this } @@ -119,21 +120,17 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { List subIntervals = new ArrayList<>(); + List disis = new ArrayList<>(); for (Weight w : subWeights) { - subIntervals.add(w.intervals(context, field)); + Scorer scorer = w.scorer(context, PostingsEnum.POSITIONS); + disis.add(scorer.iterator()); + subIntervals.add(scorer.intervals(field)); } - return IntervalQuery.this.iteratorFunction.apply(subIntervals); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - IntervalIterator intervals = intervals(context, field); - if (intervals == IntervalIterator.EMPTY) - return null; + IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit - return new IntervalScorer(this, intervals, leafScorer); + return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 24c7585d44cd..b25fa3e6f31e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -22,14 +22,16 @@ class IntervalScorer extends Scorer { private final IntervalIterator intervals; + private final String field; private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; - protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { + protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = intervals.approximation(); + this.approximation = approximation; this.simScorer = simScorer; + this.field = field; } @Override @@ -46,6 +48,13 @@ public float score() throws IOException { return simScorer.score(docID(), freq); } + @Override + public IntervalIterator intervals(String field) { + if (this.field.equals(field)) + return intervals; + return null; + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -56,7 +65,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.advanceTo(docID()); + intervals.reset(); return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index db67f0909673..d8cb9a98e71f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -61,11 +62,6 @@ public TermIntervalIterator(PostingsEnum pe) { int upTo = -1; int pos = -1; - @Override - public DocIdSetIterator approximation() { - return pe; - } - @Override public int start() { return pos; @@ -82,14 +78,8 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { - pos = -1; - if (pe.docID() == doc || (pe.docID() < doc && pe.advance(doc) == doc)) { - upTo = pe.freq(); - } - else { - upTo = -1; - } + public void reset() throws IOException { + upTo = pe.freq(); } @Override @@ -118,7 +108,6 @@ public static IntervalIterator orderedIntervalIterator(List su private static class OrderedIntervalIterator implements IntervalIterator { final List subIntervals; - final DocIdSetIterator approximation; int start; int end; @@ -127,12 +116,6 @@ private static class OrderedIntervalIterator implements IntervalIterator { private OrderedIntervalIterator(List subIntervals) { this.subIntervals = subIntervals; - this.approximation = ConjunctionDISI.intersectIntervals(subIntervals); - } - - @Override - public DocIdSetIterator approximation() { - return approximation; } @Override @@ -151,9 +134,9 @@ public int innerWidth() { } @Override - public void advanceTo(int doc) throws IOException { + public void reset() throws IOException { for (IntervalIterator it : subIntervals) { - it.advanceTo(doc); + it.reset(); } subIntervals.get(0).nextInterval(); i = 1; @@ -188,4 +171,82 @@ public int nextInterval() throws IOException { } } + public static IntervalIterator or(List subIterators) { + return new DisjunctionIntervalIterator(subIterators); + } + + private static class DisjunctionIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + IntervalIterator current; + + DisjunctionIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + @Override + public void reset() throws IOException { + queue.clear(); + for (int i = 0; i < subIterators.length; i++) { + subIterators[i].reset(); + subIterators[i].nextInterval(); + queue.add(subIterators[i]); + } + current = null; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = queue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (queue.size() > 0 && contains(queue.top(), start, end)) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + } + } + if (queue.size() == 0) { + current = IntervalIterator.EMPTY; + return NO_MORE_INTERVALS; + } + current = queue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index f9b5bda6ce45..5b8cf917ba1d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -715,36 +715,31 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return in.intervals(context, field); - } - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } if (in.isCacheable(context) == false) { // this segment is not suitable for caching - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } // Short-circuit: Check whether this segment is eligible for caching // before we take a lock because of #get if (shouldCache(context) == false) { - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } // If the lock is already busy, prefer using the uncached version than waiting if (lock.tryLock() == false) { - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); if (cacheHelper == null) { // this reader has no cache helper - return in.scorerSupplier(context); + return in.scorerSupplier(context, postings); } DocIdSet docIdSet; try { @@ -754,7 +749,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti } if (docIdSet == null) { - ScorerSupplier inSupplier = in.scorerSupplier(context); + ScorerSupplier inSupplier = in.scorerSupplier(context, postings); if (inSupplier == null) { putIfAbsent(in.getQuery(), context, DocIdSet.EMPTY, cacheHelper); return null; @@ -814,8 +809,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 89b299734144..e878924a9f99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -36,7 +36,7 @@ public String toString() { return "weight(" + MatchAllDocsQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index 525a18395434..d539c3d21453 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -54,7 +54,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index 6ffbe340144e..98df563a61ba 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -126,6 +126,11 @@ public final Collection getChildren() throws IOException { return matchingChildren; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 65d6631e9a7c..23eb49621252 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -236,7 +236,7 @@ public void extractTerms(Set terms) { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short pf) throws IOException { assert termArrays.length != 0; final LeafReader reader = context.reader(); @@ -265,7 +265,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); + postings.add(termsEnum.postings(null, PostingsEnum.highest(pf, PostingsEnum.POSITIONS))); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -307,7 +307,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index 3a46b96411cf..cfad13791b1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -203,10 +203,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context); + return weightOrBitSet.weight.scorer(context, postings); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 74218b40b0c3..9e639247aec8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -65,7 +65,7 @@ public String toString(String field) { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null || fieldInfo.hasNorms() == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index ff1538820d61..b58f1ed868aa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -396,7 +396,7 @@ public void extractTerms(Set queryTerms) { public String toString() { return "weight(" + PhraseQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { assert terms.length > 0; final LeafReader reader = context.reader(); PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length]; @@ -422,7 +422,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS); + PostingsEnum postingsEnum = te.postings(null, PostingsEnum.highest(postings, PostingsEnum.POSITIONS)); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -455,7 +455,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 689d64a50d74..325e06060336 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -114,7 +114,7 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 7e48383b4720..897eb5d989ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -224,7 +224,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); @@ -314,8 +314,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context); + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context, postings); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 6b19f295a7d8..054a23dad595 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. @@ -82,7 +83,7 @@ public int compare(ScoreDoc a, ScoreDoc b) { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext); + scorer = weight.scorer(readerContext, PostingsEnum.FREQS); } if (scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index 987293eb0476..aa9108512190 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -61,6 +61,11 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { return it == null || it.matches(); } + @Override + public IntervalIterator intervals(String field) { + return reqScorer.intervals(field); + } + @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 6d93a54560d4..d8be205f37de 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -184,6 +184,17 @@ public DocIdSetIterator iterator() { } } + @Override + public IntervalIterator intervals(String field) { + IntervalIterator reqIntervals = reqScorer.intervals(field); + IntervalIterator optIntervals = optScorer.intervals(field); + if (optIntervals == null) + return reqIntervals; + if (reqIntervals == null) + return optIntervals; + return Intervals.or(Arrays.asList(reqIntervals, optIntervals)); + } + @Override public int docID() { return reqScorer.docID(); diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index f1da1fb50cf6..3c05fb194281 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,6 +125,8 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); + public abstract IntervalIterator intervals(String field); + /** * Optional method: Return a {@link TwoPhaseIterator} view of this * {@link Scorer}. A return value of {@code null} indicates that diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 7587b37889b7..20a375955410 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -563,6 +563,11 @@ public float getMaxScore(int upTo) throws IOException { @Override public String toString() { return "scorer(" + weight + ")"; } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit. this will be fun + } + @Override public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 2a7c450805d9..9fe7dbbbecf5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; @@ -161,7 +162,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.FREQS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -187,7 +188,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { IndexOptions indexOptions = IndexOptions.NONE; if (terms.length > 0) { FieldInfo info = context.reader() @@ -208,7 +209,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, postings, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index a8bf5b0679c1..941e39227ca5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -309,12 +309,12 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet == null) { return null; } else if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context); + return weightOrBitSet.weight.scorer(context, postings); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index da06023b7c30..d6e0386a2540 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -87,21 +87,7 @@ public String toString() { } @Override - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - if (term.field().equals(field) == false) { - return null; - } - assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; - final TermsEnum termsEnum = getTermsEnum(context); - if (termsEnum == null) { - return null; - } - PostingsEnum pe = termsEnum.postings(null, PostingsEnum.POSITIONS); - return Intervals.termIterator(pe); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { + public Scorer scorer(LeafReaderContext context, short postings) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { @@ -113,7 +99,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, termsEnum, scoreMode, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, postings, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -159,7 +145,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - TermScorer scorer = (TermScorer) scorer(context); + TermScorer scorer = (TermScorer) scorer(context, PostingsEnum.FREQS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 1d9d5e670ce2..7162d481444e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -33,6 +33,7 @@ final class TermScorer extends Scorer { private final DocIdSetIterator iterator; private final LeafSimScorer docScorer; private float minCompetitiveScore; + private final String field; /** * Construct a TermScorer. @@ -44,11 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, short postings, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; + this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS); + impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.highest(postings, PostingsEnum.FREQS)); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -105,7 +107,8 @@ public long cost() { } }; } else { - postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); + short pf = PostingsEnum.highest(scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE, postings); + postingsEnum = te.postings(null, pf); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } @@ -125,6 +128,14 @@ public DocIdSetIterator iterator() { return iterator; } + @Override + public IntervalIterator intervals(String field) { + if (this.field.equals(field)) { + return Intervals.termIterator(postingsEnum); + } + return null; + } + @Override public float score() throws IOException { assert docID() != DocIdSetIterator.NO_MORE_DOCS; diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f7a88f15927e..f39380b5cd88 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -440,6 +440,11 @@ private void advanceAllTail() throws IOException { assert ensureConsistent(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public float score() throws IOException { // we need to know about all matches diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index b98a17f9e02b..98788582ae60 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; @@ -36,7 +37,7 @@ * {@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link Scorer}. *

    * Since {@link Weight} creates {@link Scorer} instances for a given - * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext)}) + * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

    @@ -45,7 +46,7 @@ *

  • A Weight is constructed by a top-level query, given a * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). *
  • A Scorer is constructed by - * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. + * {@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}. * * * @since 2.9 @@ -100,11 +101,7 @@ public final Query getQuery() { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(LeafReaderContext context) throws IOException; - - public IntervalIterator intervals(LeafReaderContext context, String field) throws IOException { - return null; - } + public abstract Scorer scorer(LeafReaderContext context, short postings) throws IOException; /** * Optional method. @@ -113,8 +110,8 @@ public IntervalIterator intervals(LeafReaderContext context, String field) throw * builds a {@link ScorerSupplier} wrapper around it. * @see #scorer */ - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - final Scorer scorer = scorer(context); + public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + final Scorer scorer = scorer(context, postings); if (scorer == null) { return null; } @@ -148,7 +145,7 @@ public long cost() { */ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - Scorer scorer = scorer(context); + Scorer scorer = scorer(context, PostingsEnum.NONE); if (scorer == null) { // No docs match return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 666f163742a3..5fe3fd8dbbf8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,6 +21,7 @@ import java.util.Objects; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IntervalIterator; import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; @@ -57,6 +58,11 @@ public int docID() { return spans.docID(); } + @Override + public IntervalIterator intervals(String field) { + return null; // nocommit + } + @Override public DocIdSetIterator iterator() { return spans; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 25b58fdc39a0..d24887f3eb8c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -124,7 +124,7 @@ private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searc public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException; @Override - public SpanScorer scorer(LeafReaderContext context) throws IOException { + public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { final Spans spans = getSpans(context, Postings.POSITIONS); if (spans == null) { return null; @@ -145,7 +145,7 @@ public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - SpanScorer scorer = scorer(context); + SpanScorer scorer = scorer(context, PostingsEnum.POSITIONS); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index bc85bbc58cb7..77029ffffc4b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -40,7 +40,7 @@ public class TestIntervalQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newTextField(field, docFields[i], Field.Store.YES)); @@ -74,19 +74,19 @@ private void checkHits(Query query, int[] results) throws IOException { public void testOrderedNearQueryWidth0() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new int[]{ 0 }); + new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), - new int[]{ 0, 1, 2, 5 }); + new TermQuery(new Term(field, "w2"))), + new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new int[]{ 0, 1, 2, 3, 5 }); + new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { @@ -98,7 +98,7 @@ public void testNestedOrderedNearQuery() throws IOException { new TermQuery(new Term(field, "w3"))) ); - checkHits(q, new int[]{ 0, 1, 2 }); + checkHits(q, new int[]{0, 1, 2}); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index b38e27bba222..fb3369de885a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -82,71 +83,68 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - public void testTermQueryIntervals() throws IOException { - int[][] expected = new int[][]{ - {}, - { 1, 4, 7 }, - { 1, 4, 7 }, - {}, - { 1, 4, 7 }, - { 0 } - }; - - Weight weight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "porridge")), ScoreMode.COMPLETE); + private void checkIntervals(Query query, String field, int[][] expected) throws IOException { + Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext ctx : searcher.leafContexts) { - assertNull(weight.intervals(ctx, "field2")); + Scorer scorer = weight.scorer(ctx, PostingsEnum.POSITIONS); + assertNull(scorer.intervals(field + "1")); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = weight.intervals(ctx, "field1"); - for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { + IntervalIterator intervals = scorer.intervals("field1"); + DocIdSetIterator it = scorer.iterator(); + int matchedDocs = 0; + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - intervals.advanceTo(doc); + intervals.reset(); int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i], intervals.end()); - i++; + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; } assertEquals(expected[id].length, i); } + assertEquals(expected.length, matchedDocs); } + } + public void testTermQueryIntervals() throws IOException { + checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", new int[][]{ + {}, + { 1, 1, 4, 4, 7, 7 }, + { 1, 1, 4, 4, 7, 7 }, + {}, + { 1, 1, 4, 4, 7, 7 }, + { 0 } + }); } public void testOrderedNearIntervals() throws IOException { - - int[][] expected = new int[][]{ + checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + "field1", new int[][]{ {}, { 0, 2, 6, 17 }, { 3, 5, 6, 21 }, {}, { 0, 2, 6, 17 }, { } - }; - - Weight peaseWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "pease")), ScoreMode.COMPLETE); - Weight hotWeight = searcher.createNormalizedWeight(new TermQuery(new Term("field1", "hot")), ScoreMode.COMPLETE); - for (LeafReaderContext ctx : searcher.leafContexts) { - NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = Intervals.orderedIntervalIterator( - Arrays.asList(peaseWeight.intervals(ctx, "field1"), hotWeight.intervals(ctx, "field1")) - ); - for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { - ids.advance(doc); - int id = (int) ids.longValue(); - intervals.advanceTo(doc); - int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - assertEquals(expected[id][i], pos); - assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i + 1], intervals.end()); - i += 2; - } - assertEquals(expected[id].length, i); - } - } - + }); } + public void testIntervalDisjunction() throws IOException { + checkIntervals(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) + .build(), "field1", new int[][]{ + {}, + { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, + { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, + { 3, 3 }, + { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, + {} + }); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 80cd4da7cf0d..e6eeae907da8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -112,6 +112,11 @@ public Collection getChildren() { return Collections.singletonList(new ChildScorer(in, "SHOULD")); } + @Override + public IntervalIterator intervals(String field) { + return in.intervals(field); + } + @Override public int docID() { return in.docID(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java index 9206b0484d4d..a8ef239d93cb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java @@ -115,4 +115,9 @@ public long cost() { }; } + @Override + public IntervalIterator intervals(String field) { + return null; + } + } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index fa113113f81a..b4e25a37d649 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -311,7 +312,7 @@ public void collect(int doc) throws IOException { if (scorer == null) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context); + scorer = w.scorer(context, PostingsEnum.FREQS); iterator = scorer.iterator(); } @@ -376,7 +377,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = (LeafReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx); + Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -406,7 +407,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx); + Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -444,7 +445,7 @@ public void collect(int doc) throws IOException { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer(context.get(leafPtr)); + Scorer scorer = w.scorer(context.get(leafPtr), PostingsEnum.FREQS); Assert.assertTrue("query collected "+doc+" but advance("+i+") says no more docs!",scorer.iterator().advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but advance("+i+") got to "+scorer.docID(),doc,scorer.docID()); float advanceScore = scorer.score(); @@ -477,7 +478,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -505,7 +506,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -525,7 +526,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { public static void checkBulkScorerSkipTo(Random r, Query query, IndexSearcher searcher) throws IOException { Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext context : searcher.getIndexReader().leaves()) { - final Scorer scorer = weight.scorer(context); + final Scorer scorer = weight.scorer(context, PostingsEnum.FREQS); final BulkScorer bulkScorer = weight.bulkScorer(context); if (scorer == null && bulkScorer == null) { continue; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index ae699130190d..dd2fd884f076 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.Bits; /** @@ -48,7 +49,7 @@ protected void search(List leaves, Weight weight, Collector c // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren - Scorer scorer = weight.scorer(ctx); + Scorer scorer = weight.scorer(ctx, collector.scoreMode().needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); if (scorer != null) { final DocIdSetIterator iterator = scorer.iterator(); final LeafCollector leafCollector = collector.getLeafCollector(ctx); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index be04e0092092..6c94dee59bf9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -68,8 +68,8 @@ public void extractTerms(Set terms) { } @Override - public SpanScorer scorer(LeafReaderContext context) throws IOException { - return in.scorer(context); + public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { + return in.scorer(context, postings); } @Override From 9e46d7a23d7889c6281403d44897441cba00d9c4 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 14:09:02 +0000 Subject: [PATCH 40/83] Move intervals() back to Scorer Having it on Weight means duplicating loads of Scorer implementations to ensure that we always return the correct positions --- .../lucene/document/RangeFieldQuery.java | 8 +- .../SortedNumericDocValuesRangeQuery.java | 4 +- .../SortedSetDocValuesRangeQuery.java | 4 +- .../lucene/index/FrozenBufferedUpdates.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 4 +- .../apache/lucene/search/BooleanWeight.java | 20 ++-- .../org/apache/lucene/search/BoostQuery.java | 4 +- .../lucene/search/ConstantScoreQuery.java | 12 +-- .../lucene/search/ConstantScoreScorer.java | 2 +- .../lucene/search/ConstantScoreWeight.java | 2 +- .../search/DisjunctionIntervalIterator.java | 102 ++++++++++++++++++ .../lucene/search/DisjunctionMaxQuery.java | 12 +-- .../lucene/search/DisjunctionScorer.java | 15 ++- .../search/DocValuesFieldExistsQuery.java | 4 +- .../lucene/search/DocValuesRewriteMethod.java | 4 +- .../lucene/search/DoubleValuesSource.java | 6 +- .../apache/lucene/search/FilterWeight.java | 6 +- .../lucene/search/IndexOrDocValuesQuery.java | 16 +-- .../apache/lucene/search/IndexSearcher.java | 6 +- .../apache/lucene/search/IntervalFilter.java | 4 +- .../lucene/search/IntervalIterator.java | 8 +- .../apache/lucene/search/IntervalQuery.java | 17 +-- .../apache/lucene/search/IntervalScorer.java | 38 ++++++- .../org/apache/lucene/search/Intervals.java | 95 +++------------- .../apache/lucene/search/LRUQueryCache.java | 16 +-- .../lucene/search/MatchAllDocsQuery.java | 4 +- .../lucene/search/MatchNoDocsQuery.java | 4 +- .../lucene/search/MultiPhraseQuery.java | 14 +-- .../MultiTermQueryConstantScoreWrapper.java | 8 +- .../lucene/search/NormsFieldExistsQuery.java | 4 +- .../org/apache/lucene/search/PhraseQuery.java | 14 +-- .../apache/lucene/search/PointInSetQuery.java | 4 +- .../apache/lucene/search/PointRangeQuery.java | 8 +- .../java/org/apache/lucene/search/Query.java | 49 ++++++++- .../apache/lucene/search/QueryRescorer.java | 2 +- .../apache/lucene/search/ReqOptSumScorer.java | 22 ++-- .../apache/lucene/search/SynonymQuery.java | 16 +-- .../apache/lucene/search/TermInSetQuery.java | 8 +- .../org/apache/lucene/search/TermQuery.java | 16 +-- .../org/apache/lucene/search/TermScorer.java | 6 +- .../java/org/apache/lucene/search/Weight.java | 14 +-- .../search/spans/FieldMaskingSpanQuery.java | 4 +- .../lucene/search/spans/SpanBoostQuery.java | 4 +- .../search/spans/SpanContainingQuery.java | 6 +- .../spans/SpanMultiTermQueryWrapper.java | 2 +- .../lucene/search/spans/SpanNearQuery.java | 6 +- .../lucene/search/spans/SpanNotQuery.java | 6 +- .../lucene/search/spans/SpanOrQuery.java | 4 +- .../search/spans/SpanPositionCheckQuery.java | 4 +- .../apache/lucene/search/spans/SpanQuery.java | 2 +- .../lucene/search/spans/SpanTermQuery.java | 2 +- .../lucene/search/spans/SpanWeight.java | 4 +- .../lucene/search/spans/SpanWithinQuery.java | 6 +- .../lucene/search/JustCompileSearch.java | 5 + .../search/TestBoolean2ScorerSupplier.java | 5 + .../lucene/search/TestBooleanScorer.java | 2 +- .../lucene/search/TestCachingCollector.java | 5 + .../lucene/search/TestConjunctionDISI.java | 5 + .../lucene/search/TestConstantScoreQuery.java | 4 +- .../apache/lucene/search/TestIntervals.java | 45 ++++---- .../lucene/search/TestLRUQueryCache.java | 10 +- .../lucene/search/TestMinShouldMatch2.java | 5 + .../apache/lucene/search/TestNeedsScores.java | 4 +- .../lucene/search/TestPositionIncrement.java | 4 +- .../TestPositiveScoresOnlyCollector.java | 7 +- .../lucene/search/TestQueryRescorer.java | 7 +- .../TestScoreCachingWrappingScorer.java | 7 +- .../apache/lucene/search/TestScorerPerf.java | 2 +- .../apache/lucene/search/TestSortRandom.java | 2 +- .../lucene/search/TestTopDocsCollector.java | 5 + .../lucene/search/TestTopFieldCollector.java | 7 +- .../TestUsageTrackingFilterCachingPolicy.java | 2 +- .../apache/lucene/search/TestWANDScorer.java | 4 +- .../search/spans/JustCompileSearchSpans.java | 2 +- .../spans/TestFieldMaskingSpanQuery.java | 14 +-- .../search/spans/TestNearSpansOrdered.java | 27 ++--- .../search/spans/TestSpanCollection.java | 7 +- .../search/spans/TestSpanContainQuery.java | 2 +- .../apache/lucene/search/spans/TestSpans.java | 10 +- .../lucene/search/AssertingIndexSearcher.java | 4 +- .../apache/lucene/search/AssertingQuery.java | 4 +- .../lucene/search/BlockScoreQueryWrapper.java | 8 +- .../org/apache/lucene/search/QueryUtils.java | 14 +-- .../search/RandomApproximationQuery.java | 9 +- .../lucene/search/ScorerIndexSearcher.java | 2 +- .../search/spans/AssertingSpanQuery.java | 4 +- .../search/spans/AssertingSpanWeight.java | 4 +- .../search/TestBaseExplanationTestCase.java | 4 +- 88 files changed, 560 insertions(+), 346 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index d507da963f4c..9b32a1506734 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -262,7 +262,7 @@ private void checkFieldInfo(FieldInfo fieldInfo) { } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { @@ -290,7 +290,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (values == null) { @@ -350,8 +350,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 0c0efdf56660..03a7fa897339 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -93,7 +93,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -102,7 +102,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { SortedNumericDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index cd6cfadac7f6..69f542aa2e48 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -104,10 +104,10 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedSetDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { SortedSetDocValues values = getValues(context.reader(), field); if (values == null) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 8bfb19a97325..1f8974a510d0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -687,7 +687,7 @@ private long applyQueryDeletes(BufferedUpdatesStream.SegmentState[] segStates) t final IndexSearcher searcher = new IndexSearcher(readerContext.reader()); searcher.setQueryCache(null); final Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE_NO_SCORES); - final Scorer scorer = weight.scorer(readerContext, PostingsEnum.NONE); + final Scorer scorer = weight.scorer(readerContext); if (scorer != null) { final DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f52df9fb9cd8..f974dc04559b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -199,12 +199,12 @@ private BooleanQuery rewriteNoScoring() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } - return new BooleanWeight(query, searcher, scoreMode, boost); + return new BooleanWeight(query, searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index ea1ffed6594a..8de9394e2142 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -45,14 +45,14 @@ final class BooleanWeight extends Weight { final ArrayList weights; final ScoreMode scoreMode; - BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { super(query); this.query = query; this.scoreMode = scoreMode; this.similarity = searcher.getSimilarity(); weights = new ArrayList<>(); for (BooleanClause c : query) { - Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, boost); + Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); weights.add(w); } } @@ -113,7 +113,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio // contributions to the score to floats), so in order to make sure that // explanations have the same value as the score, we pull a scorer and // use it to compute the score. - Scorer scorer = scorer(context, PostingsEnum.NONE); + Scorer scorer = scorer(context); int advanced = scorer.iterator().advance(doc); assert advanced == doc; return Explanation.match(scorer.score(), "sum of:", subs); @@ -211,7 +211,7 @@ private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOExcept /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} * cannot be used. */ - BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOException { + BulkScorer booleanScorer(LeafReaderContext context) throws IOException { final int numOptionalClauses = query.getClauses(Occur.SHOULD).size(); final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size(); @@ -263,7 +263,7 @@ BulkScorer booleanScorer(LeafReaderContext context, short postings) throws IOExc for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isProhibited()) { - Scorer scorer = w.scorer(context, postings); + Scorer scorer = w.scorer(context); if (scorer != null) { prohibited.add(scorer); } @@ -291,7 +291,7 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { // so that we can dynamically prune non-competitive hits. return super.bulkScorer(context); } - final BulkScorer bulkScorer = booleanScorer(context, PostingsEnum.NONE); + final BulkScorer bulkScorer = booleanScorer(context); if (bulkScorer != null) { // bulk scoring is applicable, use it return bulkScorer; @@ -302,8 +302,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } @@ -326,7 +326,7 @@ public boolean isCacheable(LeafReaderContext ctx) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); final Map> scorers = new EnumMap<>(Occur.class); @@ -337,7 +337,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) Iterator cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - ScorerSupplier subScorer = w.scorerSupplier(context, postings); + ScorerSupplier subScorer = w.scorerSupplier(context); if (subScorer == null) { if (c.isRequired()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java index 4e4649cb7100..860368240f77 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java @@ -116,8 +116,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, BoostQuery.this.boost * boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, minRequiredPostings, BoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index abdb85953dd8..269328720540 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -110,8 +110,8 @@ public long cost() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, 1f); if (scoreMode.needsScores()) { return new ConstantScoreWeight(this, boost) { @@ -125,8 +125,8 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context); if (innerScorerSupplier == null) { return null; } @@ -159,8 +159,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 56adcf390553..0040374b6147 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -65,7 +65,7 @@ public DocIdSetIterator iterator() { @Override public IntervalIterator intervals(String field) { - return null; + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index 57316b9f7e4a..cdf4be94f3c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -54,7 +54,7 @@ protected final float score() { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - final Scorer s = scorer(context, PostingsEnum.NONE); + final Scorer s = scorer(context); final boolean exists; if (s == null) { exists = false; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java new file mode 100644 index 000000000000..57af8a28252b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.util.PriorityQueue; + +abstract class DisjunctionIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + IntervalIterator current; + + DisjunctionIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + protected abstract void positionSubIntervals() throws IOException; + + @Override + public boolean reset(int doc) throws IOException { + positionSubIntervals(); + queue.clear(); + for (int i = 0; i < subIterators.length; i++) { + if (subIterators[i].reset(doc)) { + subIterators[i].nextInterval(); + queue.add(subIterators[i]); + } + } + current = null; + return queue.size() > 0; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = queue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (queue.size() > 0 && contains(queue.top(), start, end)) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + } + } + if (queue.size() == 0) { + current = IntervalIterator.EMPTY; + return Intervals.NO_MORE_INTERVALS; + } + current = queue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 552dff1283a5..f79d2b9cfcda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -103,10 +103,10 @@ protected class DisjunctionMaxWeight extends Weight { private final ScoreMode scoreMode; /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ - public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(DisjunctionMaxQuery.this); for (Query disjunctQuery : disjuncts) { - weights.add(searcher.createWeight(disjunctQuery, scoreMode, boost)); + weights.add(searcher.createWeight(disjunctQuery, scoreMode, minRequiredPostings, boost)); } this.scoreMode = scoreMode; } @@ -120,11 +120,11 @@ public void extractTerms(Set terms) { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context, postings); + Scorer subScorer = w.scorer(context); if (subScorer != null) { scorers.add(subScorer); } @@ -189,8 +189,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio /** Create the Weight used to score us */ @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new DisjunctionMaxWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new DisjunctionMaxWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Optimize our representation and our subqueries representations diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index f69fd936f453..9e5ab2813fd6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -182,7 +182,20 @@ public final float score() throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + List subIntervals = new ArrayList<>(); + for (DisiWrapper dw : subScorers) { + IntervalIterator subIt = dw.scorer.intervals(field); + if (subIt != null) + subIntervals.add(subIt); + } + if (subIntervals.size() == 0) + return null; + return new DisjunctionIntervalIterator(subIntervals) { + @Override + protected void positionSubIntervals() throws IOException { + getSubMatches(); + } + }; } /** Compute the score for the given linked list of scorers. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index bbf9c3ced9a3..23fbaecbd981 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -62,10 +62,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index a141cbe64a72..6f4408599e5c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -72,10 +72,10 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field); TermsEnum termsEnum = query.getTermsEnum(new Terms() { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index d951fc45db83..48579ccbee43 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -82,7 +82,7 @@ public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreEx * IndexReader-independent implementations can just return {@code this} * * Queries that use DoubleValuesSource objects should call rewrite() during - * {@link Query#createWeight(IndexSearcher, ScoreMode, float)} rather than during + * {@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)} rather than during * {@link Query#rewrite(IndexReader)} to avoid IndexReader reference leakage */ public abstract DoubleValuesSource rewrite(IndexSearcher reader) throws IOException; @@ -554,7 +554,7 @@ public boolean needsScores() { @Override public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException { - return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, 1f)); + return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f)); } @Override @@ -578,7 +578,7 @@ private WeightDoubleValuesSource(Weight weight) { @Override public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { - Scorer scorer = weight.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = weight.scorer(ctx); if (scorer == null) return DoubleValues.EMPTY; DocIdSetIterator it = scorer.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 3ac351029097..278ad987a225 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -47,7 +47,7 @@ protected FilterWeight(Weight weight) { /** * Alternative constructor. * Use this variant only if the weight was not obtained - * via the {@link Query#createWeight(IndexSearcher, ScoreMode, float)} + * via the {@link Query#createWeight(IndexSearcher, ScoreMode, Query.Postings, float)} * method of the query object. */ protected FilterWeight(Query query, Weight weight) { @@ -71,8 +71,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - return in.scorer(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + return in.scorer(context); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 50b04523baa3..3eb238254141 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -110,9 +110,9 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost); - final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -133,9 +133,9 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context, postings); - final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context); + final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); if (indexScorerSupplier == null || dvScorerSupplier == null) { return null; } @@ -162,8 +162,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index da5ed036ddc0..ded001453857 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -686,7 +686,7 @@ protected Explanation explain(Weight weight, int doc) throws IOException { */ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IOException { query = rewrite(query); - return createWeight(query, scoreMode, 1f); + return createWeight(query, scoreMode, Query.Postings.NONE, 1f); } /** @@ -694,9 +694,9 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO * if possible and configured. * @lucene.experimental */ - public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { final QueryCache queryCache = this.queryCache; - Weight weight = query.createWeight(this, scoreMode, boost); + Weight weight = query.createWeight(this, scoreMode, minRequiredPostings, boost); if (scoreMode.needsScores() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 852ac8070160..b2b930db570e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,7 +55,7 @@ public int innerWidth() { } @Override - public void reset() throws IOException { - in.reset(); + public boolean reset(int doc) throws IOException { + return in.reset(doc); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a85c24011e1f..a65aa1d87c5c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -29,7 +29,7 @@ public interface IntervalIterator { int innerWidth(); - void reset() throws IOException; + boolean reset(int doc) throws IOException; int nextInterval() throws IOException; @@ -55,12 +55,12 @@ public int innerWidth() { } @Override - public void reset() { - + public boolean reset(int doc) { + return false; } @Override - public int nextInterval() throws IOException { + public int nextInterval() { return Intervals.NO_MORE_INTERVALS; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 16de6fc9c1a8..1fe82eac5f75 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -57,10 +57,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(); for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_NO_SCORES, boost)); + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE, minRequiredPostings.atLeast(Postings.POSITIONS), boost)); } return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); } @@ -112,7 +112,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null && scorer.iterator().advance(doc) == doc) { return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this } @@ -120,13 +120,18 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { List subIntervals = new ArrayList<>(); List disis = new ArrayList<>(); for (Weight w : subWeights) { - Scorer scorer = w.scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = w.scorer(context); + if (scorer == null) + return null; disis.add(scorer.iterator()); - subIntervals.add(scorer.intervals(field)); + IntervalIterator it = scorer.intervals(field); + if (it == null) + return null; + subIntervals.add(it); } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index b25fa3e6f31e..e9cdc1aa4402 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -51,7 +51,40 @@ public float score() throws IOException { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) - return intervals; + return new IntervalIterator() { + boolean started = false; + + @Override + public int start() { + return intervals.start(); + } + + @Override + public int end() { + return intervals.end(); + } + + @Override + public int innerWidth() { + return intervals.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + // inner iterator already reset() in TwoPhaseIterator.matches() + started = false; + return true; + } + + @Override + public int nextInterval() throws IOException { + if (started == false) { + started = true; + return start(); + } + return intervals.nextInterval(); + } + }; return null; } @@ -65,8 +98,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.reset(); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index d8cb9a98e71f..a001235aec0e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,7 +21,6 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -78,8 +77,14 @@ public int innerWidth() { } @Override - public void reset() throws IOException { - upTo = pe.freq(); + public boolean reset(int doc) throws IOException { + if (pe.docID() == doc) { + upTo = pe.freq(); + pos = -1; + return true; + } + upTo = -1; + return false; } @Override @@ -134,13 +139,15 @@ public int innerWidth() { } @Override - public void reset() throws IOException { + public boolean reset(int doc) throws IOException { + boolean positioned = true; for (IntervalIterator it : subIntervals) { - it.reset(); + positioned &= it.reset(doc); } subIntervals.get(0).nextInterval(); i = 1; start = end = innerWidth = Integer.MIN_VALUE; + return positioned; } @Override @@ -171,82 +178,4 @@ public int nextInterval() throws IOException { } } - public static IntervalIterator or(List subIterators) { - return new DisjunctionIntervalIterator(subIterators); - } - - private static class DisjunctionIntervalIterator implements IntervalIterator { - - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; - - IntervalIterator current; - - DisjunctionIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } - } - - @Override - public int start() { - return current.start(); - } - - @Override - public int end() { - return current.end(); - } - - @Override - public int innerWidth() { - return current.innerWidth(); - } - - @Override - public void reset() throws IOException { - queue.clear(); - for (int i = 0; i < subIterators.length; i++) { - subIterators[i].reset(); - subIterators[i].nextInterval(); - queue.add(subIterators[i]); - } - current = null; - } - - @Override - public int nextInterval() throws IOException { - if (current == null) { - current = queue.top(); - return current.start(); - } - int start = current.start(), end = current.end(); - while (queue.size() > 0 && contains(queue.top(), start, end)) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { - queue.add(it); - } - } - if (queue.size() == 0) { - current = IntervalIterator.EMPTY; - return NO_MORE_INTERVALS; - } - current = queue.top(); - return current.start(); - } - - private boolean contains(IntervalIterator it, int start, int end) { - return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); - } - - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index 5b8cf917ba1d..beb73ad11159 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -715,31 +715,31 @@ private boolean shouldCache(LeafReaderContext context) throws IOException { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } if (in.isCacheable(context) == false) { // this segment is not suitable for caching - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } // Short-circuit: Check whether this segment is eligible for caching // before we take a lock because of #get if (shouldCache(context) == false) { - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } // If the lock is already busy, prefer using the uncached version than waiting if (lock.tryLock() == false) { - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); if (cacheHelper == null) { // this reader has no cache helper - return in.scorerSupplier(context, postings); + return in.scorerSupplier(context); } DocIdSet docIdSet; try { @@ -749,7 +749,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) } if (docIdSet == null) { - ScorerSupplier inSupplier = in.scorerSupplier(context, postings); + ScorerSupplier inSupplier = in.scorerSupplier(context); if (inSupplier == null) { putIfAbsent(in.getQuery(), context, DocIdSet.EMPTY, cacheHelper); return null; @@ -809,8 +809,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index e878924a9f99..f11861820bd3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -29,14 +29,14 @@ public final class MatchAllDocsQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) { return new ConstantScoreWeight(this, boost) { @Override public String toString() { return "weight(" + MatchAllDocsQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index d539c3d21453..74e8bdeaedda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -42,7 +42,7 @@ public MatchNoDocsQuery(String reason) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -54,7 +54,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 23eb49621252..f6154bc30685 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,11 +193,13 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; + private final Postings minRequiredPostings; - public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -236,7 +238,7 @@ public void extractTerms(Set terms) { } @Override - public Scorer scorer(LeafReaderContext context, short pf) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert termArrays.length != 0; final LeafReader reader = context.reader(); @@ -265,7 +267,7 @@ public Scorer scorer(LeafReaderContext context, short pf) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, PostingsEnum.highest(pf, PostingsEnum.POSITIONS))); + postings.add(termsEnum.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings())); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -307,7 +309,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -343,8 +345,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new MultiPhraseWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new MultiPhraseWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index cfad13791b1f..c2128927068d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -108,7 +108,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { /** Try to collect terms from the given terms enum and return true iff all @@ -153,7 +153,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); return new WeightOrDocIdSet(weight); } @@ -203,10 +203,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context, postings); + return weightOrBitSet.weight.scorer(context); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 9e639247aec8..3382dda13902 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -62,10 +62,10 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo == null || fieldInfo.hasNorms() == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index b58f1ed868aa..af94a03a25e3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,9 +353,10 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; + private final Postings minRequiredPostings; private transient TermStates states[]; - public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { super(PhraseQuery.this); final int[] positions = PhraseQuery.this.getPositions(); @@ -365,6 +366,7 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -396,7 +398,7 @@ public void extractTerms(Set queryTerms) { public String toString() { return "weight(" + PhraseQuery.this + ")"; } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert terms.length > 0; final LeafReader reader = context.reader(); PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length]; @@ -422,7 +424,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, PostingsEnum.highest(postings, PostingsEnum.POSITIONS)); + PostingsEnum postingsEnum = te.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings()); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -455,7 +457,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.POSITIONS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -510,8 +512,8 @@ static float termPositionsCost(TermsEnum termsEnum) throws IOException { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new PhraseWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new PhraseWeight(searcher, scoreMode, minRequiredPostings, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 325e06060336..25095400c336 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -106,7 +106,7 @@ protected PointInSetQuery(String field, int numDims, int bytesPerDim, Stream pac } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: @@ -114,7 +114,7 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl return new ConstantScoreWeight(this, boost) { @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 897eb5d989ea..683f737bde57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -99,7 +99,7 @@ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: @@ -224,7 +224,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); @@ -314,8 +314,8 @@ public long cost() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { - ScorerSupplier scorerSupplier = scorerSupplier(context, postings); + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index 54de63fc02fd..aec1d9dc9d48 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -20,6 +20,8 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.search.spans.SpanWeight; /** The abstract base class for queries.

    Instantiable subclasses are: @@ -43,6 +45,51 @@ */ public abstract class Query { + /** + * Enumeration defining what postings information should be retrieved from the + * index for a given Spans + */ + public enum Postings { + NONE { + @Override + public int getRequiredPostings() { + return PostingsEnum.NONE; + } + }, + FREQS { + @Override + public int getRequiredPostings() { + return PostingsEnum.FREQS; + } + }, + POSITIONS { + @Override + public int getRequiredPostings() { + return PostingsEnum.POSITIONS; + } + }, + PAYLOADS { + @Override + public int getRequiredPostings() { + return PostingsEnum.PAYLOADS; + } + }, + OFFSETS { + @Override + public int getRequiredPostings() { + return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS; + } + }; + + public abstract int getRequiredPostings(); + + public Postings atLeast(Postings postings) { + if (postings.compareTo(this) > 0) + return postings; + return this; + } + } + /** Prints a query to a string, with field assumed to be the * default field and omitted. */ @@ -62,7 +109,7 @@ public final String toString() { * @param scoreMode How the produced scorers will be consumed. * @param boost The boost that is propagated by the parent queries. */ - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new UnsupportedOperationException("Query " + this + " does not implement createWeight"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 054a23dad595..e98099691b09 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -83,7 +83,7 @@ public int compare(ScoreDoc a, ScoreDoc b) { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext, PostingsEnum.FREQS); + scorer = weight.scorer(readerContext); } if (scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index d8be205f37de..5a502024d781 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -192,7 +192,12 @@ public IntervalIterator intervals(String field) { return reqIntervals; if (reqIntervals == null) return optIntervals; - return Intervals.or(Arrays.asList(reqIntervals, optIntervals)); + return new DisjunctionIntervalIterator(Arrays.asList(reqIntervals, optIntervals)) { + @Override + protected void positionSubIntervals() throws IOException { + positionOptionalScorers(); + } + }; } @Override @@ -203,9 +208,17 @@ public int docID() { @Override public float score() throws IOException { // TODO: sum into a double and cast to float if we ever send required clauses to BS1 - int curDoc = reqScorer.docID(); + positionOptionalScorers(); float score = reqScorer.score(); + if (optScorer.docID() == reqScorer.docID()) { + score += optScorer.score(); + } + + return score; + } + private void positionOptionalScorers() throws IOException { + int curDoc = reqScorer.docID(); int optScorerDoc = optApproximation.docID(); if (optScorerDoc < curDoc) { optScorerDoc = optApproximation.advance(curDoc); @@ -213,11 +226,6 @@ public float score() throws IOException { optScorerDoc = optApproximation.nextDoc(); } } - if (optScorerDoc == curDoc) { - score += optScorer.score(); - } - - return score; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 9fe7dbbbecf5..c9f44f09c681 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -112,16 +112,16 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { if (scoreMode.needsScores()) { - return new SynonymWeight(this, searcher, boost); + return new SynonymWeight(this, searcher, minRequiredPostings, boost); } else { // if scores are not needed, let BooleanWeight deal with optimizing that case. BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Term term : terms) { bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } - return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); } } @@ -129,8 +129,9 @@ class SynonymWeight extends Weight { private final TermStates termStates[]; private final Similarity similarity; private final Similarity.SimScorer simWeight; + private final Postings minRequiredPostings; - SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException { + SynonymWeight(Query query, IndexSearcher searcher, Postings minRequiredPostings, float boost) throws IOException { super(query); CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field()); long docFreq = 0; @@ -151,6 +152,7 @@ class SynonymWeight extends Weight { } else { this.simWeight = null; // no terms exist at all, we won't use similarity } + this.minRequiredPostings = minRequiredPostings; } @Override @@ -162,7 +164,7 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.FREQS); + Scorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -188,7 +190,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { IndexOptions indexOptions = IndexOptions.NONE; if (terms.length > 0) { FieldInfo info = context.reader() @@ -209,7 +211,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, postings, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, minRequiredPostings, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 941e39227ca5..e08cada8d184 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -209,7 +209,7 @@ private static class WeightOrDocIdSet { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -273,7 +273,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(t.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); return new WeightOrDocIdSet(weight); } else { assert builder != null; @@ -309,12 +309,12 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet == null) { return null; } else if (weightOrBitSet.weight != null) { - return weightOrBitSet.weight.scorer(context, postings); + return weightOrBitSet.weight.scorer(context); } else { return scorer(weightOrBitSet.set); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index d6e0386a2540..e2be41a7131f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -47,14 +47,16 @@ final class TermWeight extends Weight { private final Similarity.SimScorer simScorer; private final TermStates termStates; private final ScoreMode scoreMode; + private final Postings minRequiredPostings; - public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, - float boost, TermStates termStates) throws IOException { + public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, + float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.scoreMode = scoreMode; + this.minRequiredPostings = minRequiredPostings; this.termStates = termStates; this.similarity = searcher.getSimilarity(); @@ -87,7 +89,7 @@ public String toString() { } @Override - public Scorer scorer(LeafReaderContext context, short postings) throws IOException { + public Scorer scorer(LeafReaderContext context) throws IOException { assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { @@ -99,7 +101,7 @@ public Scorer scorer(LeafReaderContext context, short postings) throws IOExcepti .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, postings, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, minRequiredPostings, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -145,7 +147,7 @@ private boolean termNotInReader(LeafReader reader, Term term) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - TermScorer scorer = (TermScorer) scorer(context, PostingsEnum.FREQS); + TermScorer scorer = (TermScorer) scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { @@ -186,7 +188,7 @@ public Term getTerm() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState; if (perReaderTermState == null @@ -197,7 +199,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo termState = this.perReaderTermState; } - return new TermWeight(searcher, scoreMode, boost, termState); + return new TermWeight(searcher, scoreMode, minRequiredPostings, boost, termState); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 7162d481444e..89efa028475a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -45,12 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, short postings, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, Query.Postings minRequiredPostings, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.highest(postings, PostingsEnum.FREQS)); + impactsEnum = te.impacts(docScorer.getSimScorer(), minRequiredPostings.atLeast(Query.Postings.FREQS).getRequiredPostings()); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,7 +107,7 @@ public long cost() { } }; } else { - short pf = PostingsEnum.highest(scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE, postings); + int pf = minRequiredPostings.atLeast(scoreMode.needsScores() ? Query.Postings.FREQS : Query.Postings.NONE).getRequiredPostings(); postingsEnum = te.postings(null, pf); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 98788582ae60..244056313f78 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -37,16 +37,16 @@ * {@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link Scorer}. *

    * Since {@link Weight} creates {@link Scorer} instances for a given - * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}) + * {@link org.apache.lucene.index.LeafReaderContext} ({@link #scorer(org.apache.lucene.index.LeafReaderContext)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

    * A Weight is used in the following way: *

      *
    1. A Weight is constructed by a top-level query, given a - * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). + * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)}). *
    2. A Scorer is constructed by - * {@link #scorer(org.apache.lucene.index.LeafReaderContext,short)}. + * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. *
    * * @since 2.9 @@ -101,7 +101,7 @@ public final Query getQuery() { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(LeafReaderContext context, short postings) throws IOException; + public abstract Scorer scorer(LeafReaderContext context) throws IOException; /** * Optional method. @@ -110,8 +110,8 @@ public final Query getQuery() { * builds a {@link ScorerSupplier} wrapper around it. * @see #scorer */ - public ScorerSupplier scorerSupplier(LeafReaderContext context, short postings) throws IOException { - final Scorer scorer = scorer(context, postings); + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final Scorer scorer = scorer(context); if (scorer == null) { return null; } @@ -145,7 +145,7 @@ public long cost() { */ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - Scorer scorer = scorer(context, PostingsEnum.NONE); + Scorer scorer = scorer(context); if (scorer == null) { // No docs match return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 4a4c4fbae993..1abea327ec17 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -90,8 +90,8 @@ public SpanQuery getMaskedQuery() { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return maskedQuery.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return maskedQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java index 9556959a3ed2..2b600ffe8c41 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java @@ -109,8 +109,8 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, SpanBoostQuery.this.boost * boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, minRequiredPostings, SpanBoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 63662994bf14..b408b39dcb93 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -44,9 +44,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index 088e73092de9..fd79ad60c16a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -96,7 +96,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new IllegalArgumentException("Rewrite first!"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 17b9e5151304..199f951aadb8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -178,10 +178,10 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); } return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, boost); } @@ -307,7 +307,7 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new SpanGapWeight(searcher, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 6c56df3abee6..e8c74f33763a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -98,9 +98,9 @@ public String toString(String field) { @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight includeWeight = include.createWeight(searcher, scoreMode, boost); - SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight includeWeight = include.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermStates(includeWeight) : null, includeWeight, excludeWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 849edaa30e6e..fb0f0aac7dce 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -116,10 +116,10 @@ public int hashCode() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { List subWeights = new ArrayList<>(clauses.size()); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); } return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, subWeights, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 099b627e1ee3..75aecc0a1fe0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -68,8 +68,8 @@ public SpanPositionCheckQuery(SpanQuery match) { protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index ca657b6cff1f..b50010fd8b85 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query { public abstract String getField(); @Override - public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException; + public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException; /** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 9ac7afb81ee3..5d8ad6400fac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -65,7 +65,7 @@ public SpanTermQuery(Term term, TermStates termStates) { public String getField() { return term.field(); } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); if (termStates == null || termStates.wasBuiltFor(topContext) == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index d24887f3eb8c..25b58fdc39a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -124,7 +124,7 @@ private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searc public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException; @Override - public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { + public SpanScorer scorer(LeafReaderContext context) throws IOException { final Spans spans = getSpans(context, Postings.POSITIONS); if (spans == null) { return null; @@ -145,7 +145,7 @@ public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - SpanScorer scorer = scorer(context, PostingsEnum.POSITIONS); + SpanScorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index fba85fe6e86a..7f29612cc710 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -45,9 +45,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 1657f9b9ced1..f80bd5d82d63 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -189,6 +189,11 @@ public int docID() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSimilarity extends Similarity { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java index 3118fa85394c..21b2ea3285f2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java @@ -59,6 +59,11 @@ public DocIdSetIterator iterator() { return it; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public String toString() { return "FakeScorer(cost=" + it.cost() + ")"; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 8a8379be3432..dab8e7923328 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -77,7 +77,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(CrazyMustUseBulkScorerQuery.this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index 12136b5b318a..3933b07e02c8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -44,6 +44,11 @@ private MockScorer() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } private static class NoOpCollector extends SimpleCollector { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java index 083ac248df91..f105216baae5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java @@ -150,6 +150,11 @@ public float score() throws IOException { public float getMaxScore(int upTo) throws IOException { return 0; } + + @Override + public IntervalIterator intervals(String field) { + return null; + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index f3382a5bf6c7..86c92f7cb937 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -135,8 +135,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return in.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return in.createWeight(searcher, scoreMode, minRequiredPostings, boost); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index fb3369de885a..53f589c90117 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Arrays; +import com.carrotsearch.randomizedtesting.annotations.Seed; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -39,6 +40,7 @@ import org.junit.AfterClass; import org.junit.BeforeClass; +@Seed("98A904E565FC8F70:BF2EBE6100A16015") public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { @@ -83,48 +85,55 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - private void checkIntervals(Query query, String field, int[][] expected) throws IOException { - Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); + private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { + Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, Query.Postings.POSITIONS, 1f); + int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - Scorer scorer = weight.scorer(ctx, PostingsEnum.POSITIONS); + Scorer scorer = weight.scorer(ctx); + if (scorer == null) + continue; assertNull(scorer.intervals(field + "1")); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = scorer.intervals("field1"); + IntervalIterator intervals = scorer.intervals(field); DocIdSetIterator it = scorer.iterator(); - int matchedDocs = 0; for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - intervals.reset(); - int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - assertEquals(expected[id][i], pos); - assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i + 1], intervals.end()); - i += 2; + System.out.println(id); + if (intervals.reset(doc)) { + int i = 0, pos; + while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + assertEquals(expected[id][i], pos); + assertEquals(expected[id][i], intervals.start()); + assertEquals(expected[id][i + 1], intervals.end()); + i += 2; + } + assertEquals(expected[id].length, i); + } + else { + assertEquals(0, expected[id].length); } - assertEquals(expected[id].length, i); } - assertEquals(expected.length, matchedDocs); } + assertEquals(expectedMatchCount, matchedDocs); } public void testTermQueryIntervals() throws IOException { - checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", new int[][]{ + checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", 4, new int[][]{ {}, { 1, 1, 4, 4, 7, 7 }, { 1, 1, 4, 4, 7, 7 }, {}, { 1, 1, 4, 4, 7, 7 }, - { 0 } + { 0, 0 } }); } public void testOrderedNearIntervals() throws IOException { checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), - "field1", new int[][]{ + "field1", 3, new int[][]{ {}, { 0, 2, 6, 17 }, { 3, 5, 6, 21 }, @@ -138,7 +147,7 @@ public void testIntervalDisjunction() throws IOException { checkIntervals(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .build(), "field1", new int[][]{ + .build(), "field1", 4, new int[][]{ {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 9de9b73f4d8e..89f75c24b7fa 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -347,7 +347,7 @@ private static class DummyQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -940,7 +940,7 @@ private static class BadQuery extends Query { int[] i = new int[] {42}; // an array so that clone keeps the reference @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1274,7 +1274,7 @@ public void testReaderNotSuitedForCaching() throws IOException { private static class NoCacheQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -1351,7 +1351,7 @@ private static class DummyQuery2 extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1450,7 +1450,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, 1) { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index f60435c57a30..0f90b1c18a94 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -402,5 +402,10 @@ public int docID() { } }; } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java index 9352f72f97b7..0cb4462a49dc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java @@ -100,8 +100,8 @@ static class AssertNeedsScores extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight w = in.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight w = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new FilterWeight(w) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 9348862387d6..64db26e3d351 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -252,7 +252,7 @@ public void testPayloadsPos0() throws Exception { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); - Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); + Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.PAYLOADS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { @@ -274,7 +274,7 @@ public void testPayloadsPos0() throws Exception { assertEquals(8, count); // System.out.println("\ngetSpans test"); - Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 9fbd6a46b56f..9b1460bb7d0e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -69,6 +69,11 @@ public long cost() { } }; } + + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } } // The scores must have positive as well as negative values @@ -97,7 +102,7 @@ public void testNegativeScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f); Scorer s = new SimpleScorer(fake); TopDocsCollector tdc = TopScoreDocCollector.create(scores.length); Collector c = new PositiveScoresOnlyCollector(tdc); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index d1f307d063ec..9ba43ae6b6ec 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -418,7 +418,7 @@ public FixedScoreQuery(int[] idToNum, boolean reverse) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new Weight(FixedScoreQuery.this) { @@ -437,6 +437,11 @@ public int docID() { return docID; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 257310176740..53ecd49b6984 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -50,6 +50,11 @@ public float getMaxScore(int upTo) throws IOException { @Override public int docID() { return doc; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } + @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { @@ -117,7 +122,7 @@ public void testGetScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.FREQS, 1f); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); scc.setScorer(s); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 59a246cb6647..22b42f8493b2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -149,7 +149,7 @@ private static class BitSetQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java index 05b016c31c35..8afaa2db8958 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java @@ -229,7 +229,7 @@ public RandomQuery(long seed, float density, List docValues) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index 17c5f85dd898..c0f6b2401cb1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -245,6 +245,11 @@ public float getMaxScore(int upTo) throws IOException { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } + + @Override + public IntervalIterator intervals(String field) { + return null; + } } public void testSetMinCompetitiveScore() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index f12e9100d656..6600b6ee92e4 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -264,7 +264,12 @@ public int docID() { public DocIdSetIterator iterator() { return scorer.iterator(); } - + + @Override + public IntervalIterator intervals(String field) { + return scorer.intervals(field); + } + }; super.setScorer(s); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index 670df770e67c..8f2bcf5bdceb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -118,7 +118,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { return new ConstantScoreWeight(DummyQuery.this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java index 5367dbcd3f05..0bffdc7ffb4d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java @@ -336,8 +336,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new FilterWeight(query.createWeight(searcher, scoreMode, boost)) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new FilterWeight(query.createWeight(searcher, scoreMode, minRequiredPostings, boost)) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { Scorer scorer = super.scorer(context); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 3244c1d5ef81..92e99abb2ad7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -94,7 +94,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index f72ea664b937..74c9fee00668 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -143,7 +143,7 @@ public void testRewrite0() throws Exception { QueryUtils.checkEqual(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); assertEquals(1, terms.size()); } @@ -163,7 +163,7 @@ public Query rewrite(IndexReader reader) { QueryUtils.checkUnequal(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); assertEquals(2, terms.size()); } @@ -177,7 +177,7 @@ public void testRewrite2() throws Exception { QueryUtils.checkEqual(q, qr); HashSet set = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE, 1f).extractTerms(set); + qr.createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f).extractTerms(set); assertEquals(2, set.size()); } @@ -253,7 +253,7 @@ public void testSpans0() throws Exception { SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,0,1); assertNext(span, 1,1,2); @@ -275,8 +275,8 @@ public void testSpans1() throws Exception { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); - Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); while (spanA.nextDoc() != Spans.NO_MORE_DOCS) { assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc()); @@ -301,7 +301,7 @@ public void testSpans2() throws Exception { new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,1,2); assertNext(span, 2,0,1); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 072d3818490a..d38db81b19bd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TopDocs; @@ -122,7 +123,7 @@ public String s(int doc, int start, int end) { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span,0,0,3); assertNext(span,1,0,4); assertFinished(span); @@ -135,7 +136,7 @@ public void testNearSpansNext() throws Exception { */ public void testNearSpansAdvanceLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -147,7 +148,7 @@ public void testNearSpansAdvanceLikeNext() throws Exception { public void testNearSpansNextThenAdvance() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -159,7 +160,7 @@ public void testNearSpansNextThenAdvance() throws Exception { public void testNearSpansNextThenAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -168,13 +169,13 @@ public void testNearSpansNextThenAdvancePast() throws Exception { public void testNearSpansAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } public void testNearSpansAdvanceTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -182,7 +183,7 @@ public void testNearSpansAdvanceTo0() throws Exception { public void testNearSpansAdvanceTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, span.advance(1)); assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); @@ -221,7 +222,7 @@ public void testOrderedSpanIteration() throws Exception { new SpanOrQuery(new SpanTermQuery(new Term(FIELD, "w1")), new SpanTermQuery(new Term(FIELD, "w2"))), new SpanTermQuery(new Term(FIELD, "w4")) }, 10, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,0,0,4); assertNext(spans,0,1,4); assertFinished(spans); @@ -231,7 +232,7 @@ public void testOrderedSpanIterationSameTerms1() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,0,2); assertFinished(spans); } @@ -240,7 +241,7 @@ public void testOrderedSpanIterationSameTerms2() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t2")), new SpanTermQuery(new Term(FIELD, "t1")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,1,4); assertNext(spans,4,2,4); assertFinished(spans); @@ -264,7 +265,7 @@ public void testGaps() throws Exception { .addGap(1) .addClause(new SpanTermQuery(new Term(FIELD, "w2"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 1, 0, 3); assertNext(spans, 2, 0, 3); assertFinished(spans); @@ -277,7 +278,7 @@ public void testGaps() throws Exception { .addClause(new SpanTermQuery(new Term(FIELD, "w3"))) .setSlop(1) .build(); - spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 2, 0, 5); assertNext(spans, 3, 0, 6); assertFinished(spans); @@ -289,7 +290,7 @@ public void testMultipleGaps() throws Exception { .addGap(2) .addClause(new SpanTermQuery(new Term(FIELD, "g"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 5, 0, 4); assertNext(spans, 5, 9, 13); assertFinished(spans); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index ff9327526d22..6c2d28c5c0b1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -120,7 +121,7 @@ public void testNestedNearQuery() throws IOException { SpanNearQuery q7 = new SpanNearQuery(new SpanQuery[]{q1, q6}, 1, true); TermCollector collector = new TermCollector(); - Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.advance(0)); spans.nextStartPosition(); checkCollectedTerms(spans, collector, new Term(FIELD, "w1"), new Term(FIELD, "w2"), new Term(FIELD, "w3")); @@ -140,7 +141,7 @@ public void testOrQuery() throws IOException { SpanOrQuery orQuery = new SpanOrQuery(q2, q3); TermCollector collector = new TermCollector(); - Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.advance(1)); spans.nextStartPosition(); @@ -170,7 +171,7 @@ public void testSpanNotQuery() throws IOException { SpanNotQuery notq = new SpanNotQuery(nq, q3); TermCollector collector = new TermCollector(); - Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(2, spans.advance(2)); spans.nextStartPosition(); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java index b4cad767706d..f7b408877bd5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java @@ -73,7 +73,7 @@ void checkHits(Query query, int[] results) throws Exception { } Spans makeSpans(SpanQuery sq) throws Exception { - return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } void tstEqualSpans(String mes, SpanQuery expectedQ, SpanQuery actualQ) throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index 151c8ee16108..d8b9f9216a8a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -194,7 +194,7 @@ public void testSpanNearOrderedEqual15() throws Exception { public void testSpanNearOrderedOverlap() throws Exception { final SpanQuery query = spanNearOrderedQuery(field, 1, "t1", "t2", "t3"); - Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals("first doc", 11, spans.nextDoc()); assertEquals("first start", 0, spans.nextStartPosition()); @@ -209,7 +209,7 @@ public void testSpanNearOrderedOverlap() throws Exception { public void testSpanNearUnOrdered() throws Exception { //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test SpanQuery senq = spanNearUnorderedQuery(field, 0, "u1", "u2"); - Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 1, 3); assertNext(spans, 5, 2, 4); assertNext(spans, 8, 2, 4); @@ -218,7 +218,7 @@ public void testSpanNearUnOrdered() throws Exception { assertFinished(spans); senq = spanNearUnorderedQuery(1, senq, spanTermQuery(field, "u2")); - spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 0, 3); assertNext(spans, 4, 1, 3); // unordered spans can be subsets assertNext(spans, 5, 0, 4); @@ -232,7 +232,7 @@ public void testSpanNearUnOrdered() throws Exception { } private Spans orSpans(String[] terms) throws Exception { - return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } public void testSpanOrEmpty() throws Exception { @@ -414,7 +414,7 @@ private int spanCount(String include, int slop, String exclude, int pre, int pos SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms); SpanQuery eq = spanTermQuery(field, exclude); SpanQuery snq = spanNotQuery(iq, eq, pre, post); - Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); int i = 0; if (spans != null) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 2a6376df1f17..4d98f89017a2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -52,9 +52,9 @@ public AssertingIndexSearcher(Random random, IndexReaderContext context, Execut } @Override - public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { // this adds assertions to the inner weights/scorers too - return new AssertingWeight(random, super.createWeight(query, scoreMode, boost), scoreMode); + return new AssertingWeight(random, super.createWeight(query, scoreMode, minRequiredPostings, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java index b3d2f8116c44..e136eaa1023e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java @@ -39,9 +39,9 @@ public static Query wrap(Random random, Query query) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { assert boost >= 0; - return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, boost), scoreMode); + return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, minRequiredPostings, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 3b9a740a448f..98e56a255875 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -73,8 +73,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight inWeight = query.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight inWeight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); if (scoreMode.needsScores() == false) { return inWeight; } @@ -196,6 +196,10 @@ public float getMaxScore(int upTo) throws IOException { return max; } + @Override + public IntervalIterator intervals(String field) { + throw new UnsupportedOperationException(); + } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index b4e25a37d649..71592a8a3701 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -312,7 +312,7 @@ public void collect(int doc) throws IOException { if (scorer == null) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context, PostingsEnum.FREQS); + scorer = w.scorer(context); iterator = scorer.iterator(); } @@ -377,7 +377,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = (LeafReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = w.scorer(ctx); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -407,7 +407,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx, PostingsEnum.NONE); + Scorer scorer = w.scorer(ctx); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -445,7 +445,7 @@ public void collect(int doc) throws IOException { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer(context.get(leafPtr), PostingsEnum.FREQS); + Scorer scorer = w.scorer(context.get(leafPtr)); Assert.assertTrue("query collected "+doc+" but advance("+i+") says no more docs!",scorer.iterator().advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but advance("+i+") got to "+scorer.docID(),doc,scorer.docID()); float advanceScore = scorer.score(); @@ -478,7 +478,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -506,7 +506,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); - Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext(), PostingsEnum.NONE); + Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean more = false; @@ -526,7 +526,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { public static void checkBulkScorerSkipTo(Random r, Query query, IndexSearcher searcher) throws IOException { Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE); for (LeafReaderContext context : searcher.getIndexReader().leaves()) { - final Scorer scorer = weight.scorer(context, PostingsEnum.FREQS); + final Scorer scorer = weight.scorer(context); final BulkScorer bulkScorer = weight.bulkScorer(context); if (scorer == null && bulkScorer == null) { continue; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index a050b50401cb..8c408b17276c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -62,8 +62,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final Weight weight = query.createWeight(searcher, scoreMode, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + final Weight weight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new RandomApproximationWeight(weight, new Random(random.nextLong())); } @@ -108,6 +108,11 @@ public float score() throws IOException { return scorer.score(); } + @Override + public IntervalIterator intervals(String field) { + return scorer.intervals(field); + } + @Override public int advanceShallow(int target) throws IOException { return scorer.advanceShallow(target); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index dd2fd884f076..97c5c7a1338e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -49,7 +49,7 @@ protected void search(List leaves, Weight weight, Collector c // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren - Scorer scorer = weight.scorer(ctx, collector.scoreMode().needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); + Scorer scorer = weight.scorer(ctx); if (scorer != null) { final DocIdSetIterator iterator = scorer.iterator(); final LeafCollector leafCollector = collector.getLeafCollector(ctx); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index f24a4ff8fe37..bcd9bf1563dc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -43,8 +43,8 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight weight = in.createWeight(searcher, scoreMode, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + SpanWeight weight = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); return new AssertingSpanWeight(searcher, weight); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index 6c94dee59bf9..be04e0092092 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -68,8 +68,8 @@ public void extractTerms(Set terms) { } @Override - public SpanScorer scorer(LeafReaderContext context, short postings) throws IOException { - return in.scorer(context, postings); + public SpanScorer scorer(LeafReaderContext context) throws IOException { + return in.scorer(context); } @Override diff --git a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java index 2f0f067c5136..d36f3e21c72a 100644 --- a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java +++ b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java @@ -72,8 +72,8 @@ public BrokenExplainTermQuery(Term t, boolean toggleExplainMatch, boolean breakE this.breakExplainScores = breakExplainScores; } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, boost)); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, minRequiredPostings, boost)); } } From f8c9bdb31b5b35fd26811ff6c9662eb24dd2b5fc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 16:34:41 +0000 Subject: [PATCH 41/83] Add unorderedNearQuery --- .../search/DisjunctionIntervalIterator.java | 8 +- .../lucene/search/IntervalFunction.java | 36 +++++++ .../apache/lucene/search/IntervalQuery.java | 4 + .../org/apache/lucene/search/Intervals.java | 95 +++++++++++++++++++ .../apache/lucene/search/TestIntervals.java | 19 +++- 5 files changed, 154 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 57af8a28252b..a7df0b4d59a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -64,10 +64,10 @@ public int innerWidth() { public boolean reset(int doc) throws IOException { positionSubIntervals(); queue.clear(); - for (int i = 0; i < subIterators.length; i++) { - if (subIterators[i].reset(doc)) { - subIterators[i].nextInterval(); - queue.add(subIterators[i]); + for (IntervalIterator subIterator : subIterators) { + if (subIterator.reset(doc)) { + subIterator.nextInterval(); + queue.add(subIterator); } } current = null; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 0db038febea1..f3adf6c02076 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -67,4 +67,40 @@ public int hashCode() { } } + public static class UnorderedNearFunction extends IntervalFunction { + + final int minWidth; + final int maxWidth; + + public UnorderedNearFunction(int minWidth, int maxWidth) { + this.minWidth = minWidth; + this.maxWidth = maxWidth; + } + + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.innerWidthFilter(Intervals.unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); + } + + @Override + public String toString() { + return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnorderedNearFunction that = (UnorderedNearFunction) o; + return minWidth == that.minWidth && + maxWidth == that.maxWidth; + } + + @Override + public int hashCode() { + return Objects.hash(minWidth, maxWidth); + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 1fe82eac5f75..f25d8004eac9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -40,6 +40,10 @@ public static IntervalQuery orderedNearQuery(String field, int width, Query... s return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } + public static IntervalQuery unorderedNearQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); + } + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index a001235aec0e..d7bd588728b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -178,4 +179,98 @@ public int nextInterval() throws IOException { } } + public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + for (IntervalIterator it : subIntervals) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new UnorderedIntervalIterator(subIntervals); + } + + private static class UnorderedIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + int start, end, innerStart, innerEnd, queueEnd; + + UnorderedIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerEnd - innerStart + 1; + } + + @Override + public boolean reset(int doc) throws IOException { + this.queue.clear(); + this.queueEnd = start = end = innerEnd = innerStart = -1; + boolean positioned = true; + for (IntervalIterator subIterator : subIterators) { + positioned &= subIterator.reset(doc); + subIterator.nextInterval(); + queue.add(subIterator); + queueEnd = Math.max(queueEnd, subIterator.end()); + } + return positioned; + } + + void updateRightExtreme(IntervalIterator it) { + int itEnd = it.end(); + if (itEnd > queueEnd) { + queueEnd = itEnd; + innerEnd = it.start(); + } + } + + @Override + public int nextInterval() throws IOException { + while (this.queue.size() == subIterators.length && queue.top().start() == start) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } + if (this.queue.size() < subIterators.length) + return NO_MORE_INTERVALS; + do { + start = queue.top().start(); + innerStart = queue.top().end(); + end = queueEnd; + if (queue.top().end() == end) + return start; + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && end == queueEnd); + return start; + } + + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 53f589c90117..c7a73761aea9 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -40,14 +40,13 @@ import org.junit.AfterClass; import org.junit.BeforeClass; -@Seed("98A904E565FC8F70:BF2EBE6100A16015") public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", - "Nor here, nowt hot going on in this one", + "Nor here, nowt hot going on in pease this one", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", "Porridge is great" }; @@ -100,7 +99,6 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i matchedDocs++; ids.advance(doc); int id = (int) ids.longValue(); - System.out.println(id); if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { @@ -143,6 +141,19 @@ public void testOrderedNearIntervals() throws IOException { }); } + public void testUnorderedNearIntervals() throws IOException { + checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + "field1", 3, new int[][]{ + {}, + { 0, 2, 2, 3, 6, 17 }, + { 3, 5, 5, 6, 6, 21 }, + { 3, 7 }, + { 0, 2, 2, 3, 6, 17 }, + {} + }); + } + public void testIntervalDisjunction() throws IOException { checkIntervals(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) @@ -151,7 +162,7 @@ public void testIntervalDisjunction() throws IOException { {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, - { 3, 3 }, + { 3, 3, 7, 7 }, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, {} }); From b755f6fd7ea8cc81f9df52efa2671ced003509ec Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 21 Feb 2018 18:08:50 +0000 Subject: [PATCH 42/83] test for more complex queries --- .../org/apache/lucene/search/TestIntervals.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index c7a73761aea9..330fbb76dfb8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -167,4 +167,21 @@ public void testIntervalDisjunction() throws IOException { {} }); } + + public void testNesting() throws IOException { + checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + new TermQuery(new Term("field1", "pease")), + new TermQuery(new Term("field1", "porridge")), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) + .build()), "field1", 3, new int[][]{ + {}, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + {}, + { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, + {} + }); + } } From f0ac41ebc7c7cf8756613a7d6fd8484054c357a2 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 10:13:52 +0000 Subject: [PATCH 43/83] Use ScoreMode to pass postings flags, add scoring to IntervalQuery --- .../lucene/document/RangeFieldQuery.java | 2 +- .../SortedNumericDocValuesRangeQuery.java | 2 +- .../SortedSetDocValuesRangeQuery.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 4 +- .../apache/lucene/search/BooleanWeight.java | 4 +- .../org/apache/lucene/search/BoostQuery.java | 4 +- .../lucene/search/ConstantScoreQuery.java | 4 +- .../lucene/search/DisjunctionMaxQuery.java | 8 ++-- .../search/DocValuesFieldExistsQuery.java | 2 +- .../lucene/search/DocValuesRewriteMethod.java | 2 +- .../lucene/search/DoubleValuesSource.java | 4 +- .../lucene/search/IndexOrDocValuesQuery.java | 6 +-- .../apache/lucene/search/IndexSearcher.java | 8 ++-- .../apache/lucene/search/IntervalQuery.java | 32 ++++++++----- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MatchNoDocsQuery.java | 2 +- .../lucene/search/MultiPhraseQuery.java | 12 ++--- .../MultiTermQueryConstantScoreWrapper.java | 4 +- .../lucene/search/NormsFieldExistsQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 12 ++--- .../apache/lucene/search/PointInSetQuery.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 2 +- .../java/org/apache/lucene/search/Query.java | 47 +------------------ .../org/apache/lucene/search/ScoreMode.java | 41 ++++++++++++++++ .../apache/lucene/search/SynonymQuery.java | 12 ++--- .../apache/lucene/search/TermInSetQuery.java | 4 +- .../org/apache/lucene/search/TermQuery.java | 10 ++-- .../org/apache/lucene/search/TermScorer.java | 7 ++- .../search/spans/FieldMaskingSpanQuery.java | 4 +- .../lucene/search/spans/SpanBoostQuery.java | 4 +- .../search/spans/SpanContainingQuery.java | 6 +-- .../spans/SpanMultiTermQueryWrapper.java | 2 +- .../lucene/search/spans/SpanNearQuery.java | 6 +-- .../lucene/search/spans/SpanNotQuery.java | 6 +-- .../lucene/search/spans/SpanOrQuery.java | 4 +- .../search/spans/SpanPositionCheckQuery.java | 4 +- .../apache/lucene/search/spans/SpanQuery.java | 2 +- .../lucene/search/spans/SpanTermQuery.java | 2 +- .../lucene/search/spans/SpanWithinQuery.java | 6 +-- .../lucene/search/TestBooleanScorer.java | 2 +- .../lucene/search/TestConstantScoreQuery.java | 4 +- .../apache/lucene/search/TestIntervals.java | 2 +- .../lucene/search/TestLRUQueryCache.java | 10 ++-- .../apache/lucene/search/TestNeedsScores.java | 4 +- .../lucene/search/TestPositionIncrement.java | 4 +- .../TestPositiveScoresOnlyCollector.java | 2 +- .../lucene/search/TestQueryRescorer.java | 2 +- .../TestScoreCachingWrappingScorer.java | 2 +- .../apache/lucene/search/TestScorerPerf.java | 2 +- .../apache/lucene/search/TestSortRandom.java | 2 +- .../TestUsageTrackingFilterCachingPolicy.java | 2 +- .../apache/lucene/search/TestWANDScorer.java | 4 +- .../search/spans/JustCompileSearchSpans.java | 2 +- .../spans/TestFieldMaskingSpanQuery.java | 14 +++--- .../search/spans/TestNearSpansOrdered.java | 26 +++++----- .../search/spans/TestSpanCollection.java | 6 +-- .../search/spans/TestSpanContainQuery.java | 2 +- .../apache/lucene/search/spans/TestSpans.java | 10 ++-- .../lucene/search/AssertingIndexSearcher.java | 4 +- .../apache/lucene/search/AssertingQuery.java | 4 +- .../lucene/search/BlockScoreQueryWrapper.java | 4 +- .../search/RandomApproximationQuery.java | 4 +- .../search/spans/AssertingSpanQuery.java | 4 +- .../search/TestBaseExplanationTestCase.java | 4 +- 64 files changed, 208 insertions(+), 207 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index 9b32a1506734..a24b7cdfae58 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -262,7 +262,7 @@ private void checkFieldInfo(FieldInfo fieldInfo) { } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 03a7fa897339..246b50f3dab6 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -93,7 +93,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index 69f542aa2e48..de7c11b1cc9a 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -104,7 +104,7 @@ public Query rewrite(IndexReader reader) throws IOException { abstract SortedSetDocValues getValues(LeafReader reader, String field) throws IOException; @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f974dc04559b..f52df9fb9cd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -199,12 +199,12 @@ private BooleanQuery rewriteNoScoring() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } - return new BooleanWeight(query, searcher, scoreMode, minRequiredPostings, boost); + return new BooleanWeight(query, searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 8de9394e2142..829d72a5ff19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -45,14 +45,14 @@ final class BooleanWeight extends Weight { final ArrayList weights; final ScoreMode scoreMode; - BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + BooleanWeight(BooleanQuery query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(query); this.query = query; this.scoreMode = scoreMode; this.similarity = searcher.getSimilarity(); weights = new ArrayList<>(); for (BooleanClause c : query) { - Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, boost); weights.add(w); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java index 860368240f77..4e4649cb7100 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BoostQuery.java @@ -116,8 +116,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, minRequiredPostings, BoostQuery.this.boost * boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, BoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 269328720540..464cde6a45f9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -110,8 +110,8 @@ public long cost() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, 1f); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight innerWeight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f); if (scoreMode.needsScores()) { return new ConstantScoreWeight(this, boost) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index f79d2b9cfcda..1e67cb150465 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -103,10 +103,10 @@ protected class DisjunctionMaxWeight extends Weight { private final ScoreMode scoreMode; /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ - public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public DisjunctionMaxWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(DisjunctionMaxQuery.this); for (Query disjunctQuery : disjuncts) { - weights.add(searcher.createWeight(disjunctQuery, scoreMode, minRequiredPostings, boost)); + weights.add(searcher.createWeight(disjunctQuery, scoreMode, boost)); } this.scoreMode = scoreMode; } @@ -189,8 +189,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio /** Create the Weight used to score us */ @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new DisjunctionMaxWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new DisjunctionMaxWeight(searcher, scoreMode, boost); } /** Optimize our representation and our subqueries representations diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index 23fbaecbd981..009f11cf116f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -62,7 +62,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index 6f4408599e5c..5d591983fab0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -72,7 +72,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 48579ccbee43..3c52172019dd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -82,7 +82,7 @@ public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreEx * IndexReader-independent implementations can just return {@code this} * * Queries that use DoubleValuesSource objects should call rewrite() during - * {@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)} rather than during + * {@link Query#createWeight(IndexSearcher, ScoreMode, float)} rather than during * {@link Query#rewrite(IndexReader)} to avoid IndexReader reference leakage */ public abstract DoubleValuesSource rewrite(IndexSearcher reader) throws IOException; @@ -554,7 +554,7 @@ public boolean needsScores() { @Override public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException { - return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f)); + return new WeightDoubleValuesSource(searcher.rewrite(query).createWeight(searcher, ScoreMode.COMPLETE, 1f)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 3eb238254141..f89924d16054 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -110,9 +110,9 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); - final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost); + final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost); return new Weight(this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index ded001453857..c23d3da347d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -686,7 +686,7 @@ protected Explanation explain(Weight weight, int doc) throws IOException { */ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IOException { query = rewrite(query); - return createWeight(query, scoreMode, Query.Postings.NONE, 1f); + return createWeight(query, scoreMode, 1f); } /** @@ -694,10 +694,10 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO * if possible and configured. * @lucene.experimental */ - public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; - Weight weight = query.createWeight(this, scoreMode, minRequiredPostings, boost); - if (scoreMode.needsScores() == false && queryCache != null) { + Weight weight = query.createWeight(this, scoreMode, boost); + if (scoreMode.useQueryCache() && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index f25d8004eac9..f409de96817f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -28,6 +29,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; public final class IntervalQuery extends Query { @@ -61,22 +63,29 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(); for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE, minRequiredPostings.atLeast(Postings.POSITIONS), boost)); + subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); } - return new IntervalWeight(this, subWeights, buildSimScorer(searcher, subWeights), scoreMode); + return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(searcher, subWeights, boost) : null, scoreMode); } - private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights) { - // nocommit - return new Similarity.SimScorer(field) { - @Override - public float score(float freq, long norm) { - return 1; + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights, float boost) throws IOException { + Set terms = new HashSet<>(); + for (Weight w : subWeights) { + w.extractTerms(terms); // nocommit can we do this without building TermStates twice? + } + TermStatistics[] termStats = new TermStatistics[terms.size()]; + int termUpTo = 0; + for (Term term : terms) { + TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.readerContext, term, true)); + if (termStatistics != null) { + termStats[termUpTo++] = termStatistics; } - }; + } + CollectionStatistics collectionStats = searcher.collectionStatistics(field); + return searcher.getSimilarity().scorer(boost, collectionStats, termStats); } @Override @@ -138,7 +147,8 @@ public Scorer scorer(LeafReaderContext context) throws IOException { subIntervals.add(it); } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); - LeafSimScorer leafScorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); // nocommit + LeafSimScorer leafScorer = simScorer == null ? null + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index f11861820bd3..89b299734144 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -29,7 +29,7 @@ public final class MatchAllDocsQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { return new ConstantScoreWeight(this, boost) { @Override public String toString() { diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index 74e8bdeaedda..525a18395434 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -42,7 +42,7 @@ public MatchNoDocsQuery(String reason) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index f6154bc30685..2b6bde8a1daa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,13 +193,13 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; - private final Postings minRequiredPostings; + private final int postingsFlags; - public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) + public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; + this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -267,7 +267,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings())); + postings.add(termsEnum.postings(null, this.postingsFlags)); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -345,8 +345,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new MultiPhraseWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new MultiPhraseWeight(searcher, scoreMode, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index c2128927068d..3a46b96411cf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -108,7 +108,7 @@ public final int hashCode() { public final String getField() { return query.getField(); } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { /** Try to collect terms from the given terms enum and return true iff all @@ -153,7 +153,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); return new WeightOrDocIdSet(weight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 3382dda13902..74218b40b0c3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -62,7 +62,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index af94a03a25e3..360b0175061e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,10 +353,10 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; - private final Postings minRequiredPostings; + private final int postingsFlags; private transient TermStates states[]; - public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) + public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(PhraseQuery.this); final int[] positions = PhraseQuery.this.getPositions(); @@ -366,7 +366,7 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minReq throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; + this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -424,7 +424,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, minRequiredPostings.atLeast(Postings.POSITIONS).getRequiredPostings()); + PostingsEnum postingsEnum = te.postings(null, postingsFlags); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -512,8 +512,8 @@ static float termPositionsCost(TermsEnum termsEnum) throws IOException { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new PhraseWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new PhraseWeight(searcher, scoreMode, boost); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index 25095400c336..689d64a50d74 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -106,7 +106,7 @@ protected PointInSetQuery(String field, int numDims, int bytesPerDim, Stream pac } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 683f737bde57..7e48383b4720 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -99,7 +99,7 @@ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) } @Override - public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index aec1d9dc9d48..22631b393490 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -45,51 +45,6 @@ */ public abstract class Query { - /** - * Enumeration defining what postings information should be retrieved from the - * index for a given Spans - */ - public enum Postings { - NONE { - @Override - public int getRequiredPostings() { - return PostingsEnum.NONE; - } - }, - FREQS { - @Override - public int getRequiredPostings() { - return PostingsEnum.FREQS; - } - }, - POSITIONS { - @Override - public int getRequiredPostings() { - return PostingsEnum.POSITIONS; - } - }, - PAYLOADS { - @Override - public int getRequiredPostings() { - return PostingsEnum.PAYLOADS; - } - }, - OFFSETS { - @Override - public int getRequiredPostings() { - return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS; - } - }; - - public abstract int getRequiredPostings(); - - public Postings atLeast(Postings postings) { - if (postings.compareTo(this) > 0) - return postings; - return this; - } - } - /** Prints a query to a string, with field assumed to be the * default field and omitted. */ @@ -109,7 +64,7 @@ public final String toString() { * @param scoreMode How the produced scorers will be consumed. * @param boost The boost that is propagated by the parent queries. */ - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new UnsupportedOperationException("Query " + this + " does not implement createWeight"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 31a5d108fc37..2c014efc31d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.search; +import org.apache.lucene.index.PostingsEnum; + /** * Different modes of search. */ @@ -29,6 +31,11 @@ public enum ScoreMode { public boolean needsScores() { return true; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.FREQS; + } }, /** @@ -40,6 +47,28 @@ public boolean needsScores() { public boolean needsScores() { return false; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.NONE; + } + }, + + COMPLETE_POSITIONS { + @Override + public boolean needsScores() { + return false; + } + + @Override + public boolean useQueryCache() { + return false; + } + + @Override + public int minRequiredPostings() { + return PostingsEnum.POSITIONS; + } }, /** @@ -51,10 +80,22 @@ public boolean needsScores() { public boolean needsScores() { return true; } + + @Override + public int minRequiredPostings() { + return PostingsEnum.FREQS; + } }; /** * Whether this {@link ScoreMode} needs to compute scores. */ public abstract boolean needsScores(); + + public abstract int minRequiredPostings(); + + public boolean useQueryCache() { + return !needsScores(); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index c9f44f09c681..00ab66610914 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -112,16 +112,16 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { if (scoreMode.needsScores()) { - return new SynonymWeight(this, searcher, minRequiredPostings, boost); + return new SynonymWeight(this, searcher, boost); } else { // if scores are not needed, let BooleanWeight deal with optimizing that case. BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Term term : terms) { bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } - return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + return searcher.rewrite(bq.build()).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); } } @@ -129,9 +129,8 @@ class SynonymWeight extends Weight { private final TermStates termStates[]; private final Similarity similarity; private final Similarity.SimScorer simWeight; - private final Postings minRequiredPostings; - SynonymWeight(Query query, IndexSearcher searcher, Postings minRequiredPostings, float boost) throws IOException { + SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException { super(query); CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field()); long docFreq = 0; @@ -152,7 +151,6 @@ class SynonymWeight extends Weight { } else { this.simWeight = null; // no terms exist at all, we won't use similarity } - this.minRequiredPostings = minRequiredPostings; } @Override @@ -211,7 +209,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, minRequiredPostings, simScorer)); + subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index e08cada8d184..a8bf5b0679c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -209,7 +209,7 @@ private static class WeightOrDocIdSet { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override @@ -273,7 +273,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { bq.add(new TermQuery(new Term(t.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, minRequiredPostings, score()); + final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); return new WeightOrDocIdSet(weight); } else { assert builder != null; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index e2be41a7131f..6ee9c0a61cbf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -47,16 +47,14 @@ final class TermWeight extends Weight { private final Similarity.SimScorer simScorer; private final TermStates termStates; private final ScoreMode scoreMode; - private final Postings minRequiredPostings; - public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, + public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.scoreMode = scoreMode; - this.minRequiredPostings = minRequiredPostings; this.termStates = termStates; this.similarity = searcher.getSimilarity(); @@ -101,7 +99,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, minRequiredPostings, scorer); + return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -188,7 +186,7 @@ public Term getTerm() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState; if (perReaderTermState == null @@ -199,7 +197,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings termState = this.perReaderTermState; } - return new TermWeight(searcher, scoreMode, minRequiredPostings, boost, termState); + return new TermWeight(searcher, scoreMode, boost, termState); } /** Prints a user-readable version of this query. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 89efa028475a..9d2c7192fc4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -45,12 +45,12 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, Query.Postings minRequiredPostings, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), minRequiredPostings.atLeast(Query.Postings.FREQS).getRequiredPostings()); + impactsEnum = te.impacts(docScorer.getSimScorer(), scoreMode.minRequiredPostings()); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,8 +107,7 @@ public long cost() { } }; } else { - int pf = minRequiredPostings.atLeast(scoreMode.needsScores() ? Query.Postings.FREQS : Query.Postings.NONE).getRequiredPostings(); - postingsEnum = te.postings(null, pf); + postingsEnum = te.postings(null, scoreMode.minRequiredPostings()); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 1abea327ec17..4a4c4fbae993 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -90,8 +90,8 @@ public SpanQuery getMaskedQuery() { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return maskedQuery.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return maskedQuery.createWeight(searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java index 2b600ffe8c41..9556959a3ed2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanBoostQuery.java @@ -109,8 +109,8 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return query.createWeight(searcher, scoreMode, minRequiredPostings, SpanBoostQuery.this.boost * boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return query.createWeight(searcher, scoreMode, SpanBoostQuery.this.boost * boost); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index b408b39dcb93..63662994bf14 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -44,9 +44,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index fd79ad60c16a..088e73092de9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -96,7 +96,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new IllegalArgumentException("Rewrite first!"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 199f951aadb8..17b9e5151304 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -178,10 +178,10 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, boost); } @@ -307,7 +307,7 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new SpanGapWeight(searcher, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index e8c74f33763a..6c56df3abee6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -98,9 +98,9 @@ public String toString(String field) { @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight includeWeight = include.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight includeWeight = include.createWeight(searcher, scoreMode, boost); + SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermStates(includeWeight) : null, includeWeight, excludeWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index fb0f0aac7dce..849edaa30e6e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -116,10 +116,10 @@ public int hashCode() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(clauses.size()); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, scoreMode, minRequiredPostings, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, subWeights, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 75aecc0a1fe0..099b627e1ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -68,8 +68,8 @@ public SpanPositionCheckQuery(SpanQuery match) { protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index b50010fd8b85..ca657b6cff1f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query { public abstract String getField(); @Override - public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException; + public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException; /** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 5d8ad6400fac..9ac7afb81ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -65,7 +65,7 @@ public SpanTermQuery(Term term, TermStates termStates) { public String getField() { return term.field(); } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); if (termStates == null || termStates.wasBuiltFor(topContext) == false) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 7f29612cc710..fba85fe6e86a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -45,9 +45,9 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, scoreMode, minRequiredPostings, boost); - SpanWeight littleWeight = little.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index dab8e7923328..8a8379be3432 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -77,7 +77,7 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(CrazyMustUseBulkScorerQuery.this) { @Override public void extractTerms(Set terms) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 86c92f7cb937..f3382a5bf6c7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -135,8 +135,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return in.createWeight(searcher, scoreMode, boost); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 330fbb76dfb8..80094814cf49 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -85,7 +85,7 @@ public static void teardownIndex() throws IOException { } private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { - Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, Query.Postings.POSITIONS, 1f); + Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_POSITIONS, 1f); int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { Scorer scorer = weight.scorer(ctx); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 89f75c24b7fa..9de9b73f4d8e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -347,7 +347,7 @@ private static class DummyQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -940,7 +940,7 @@ private static class BadQuery extends Query { int[] i = new int[] {42}; // an array so that clone keeps the reference @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1274,7 +1274,7 @@ public void testReaderNotSuitedForCaching() throws IOException { private static class NoCacheQuery extends Query { @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(this) { @Override public void extractTerms(Set terms) { @@ -1351,7 +1351,7 @@ private static class DummyQuery2 extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { @@ -1450,7 +1450,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, 1) { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java index 0cb4462a49dc..9352f72f97b7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java @@ -100,8 +100,8 @@ static class AssertNeedsScores extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight w = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight w = in.createWeight(searcher, scoreMode, boost); return new FilterWeight(w) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 64db26e3d351..9348862387d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -252,7 +252,7 @@ public void testPayloadsPos0() throws Exception { System.out.println("\ngetPayloadSpans test"); } PayloadSpanCollector collector = new PayloadSpanCollector(); - Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.PAYLOADS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); + Spans pspans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); while (pspans.nextDoc() != Spans.NO_MORE_DOCS) { while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if (VERBOSE) { @@ -274,7 +274,7 @@ public void testPayloadsPos0() throws Exception { assertEquals(8, count); // System.out.println("\ngetSpans test"); - Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); count = 0; sawZero = false; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 9b1460bb7d0e..81855bb4dc84 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -102,7 +102,7 @@ public void testNegativeScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); Scorer s = new SimpleScorer(fake); TopDocsCollector tdc = TopScoreDocCollector.create(scores.length); Collector c = new PositiveScoresOnlyCollector(tdc); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index 9ba43ae6b6ec..eb46ab49e466 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -418,7 +418,7 @@ public FixedScoreQuery(int[] idToNum, boolean reverse) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new Weight(FixedScoreQuery.this) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 53ecd49b6984..900267166894 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -122,7 +122,7 @@ public void testGetScores() throws Exception { IndexReader ir = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(ir); - Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.FREQS, 1f); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher, ScoreMode.COMPLETE, 1f); Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); scc.setScorer(s); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 22b42f8493b2..59a246cb6647 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -149,7 +149,7 @@ private static class BitSetQuery extends Query { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java index 8afaa2db8958..05b016c31c35 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java @@ -229,7 +229,7 @@ public RandomQuery(long seed, float density, List docValues) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index 8f2bcf5bdceb..670df770e67c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -118,7 +118,7 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(DummyQuery.this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java index 0bffdc7ffb4d..5367dbcd3f05 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java @@ -336,8 +336,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new FilterWeight(query.createWeight(searcher, scoreMode, minRequiredPostings, boost)) { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new FilterWeight(query.createWeight(searcher, scoreMode, boost)) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { Scorer scorer = super.scorer(context); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 92e99abb2ad7..3244c1d5ef81 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -94,7 +94,7 @@ public String getField() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index 74c9fee00668..f72ea664b937 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -143,7 +143,7 @@ public void testRewrite0() throws Exception { QueryUtils.checkEqual(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); assertEquals(1, terms.size()); } @@ -163,7 +163,7 @@ public Query rewrite(IndexReader reader) { QueryUtils.checkUnequal(q, qr); Set terms = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).extractTerms(terms); + qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms); assertEquals(2, terms.size()); } @@ -177,7 +177,7 @@ public void testRewrite2() throws Exception { QueryUtils.checkEqual(q, qr); HashSet set = new HashSet<>(); - qr.createWeight(searcher, ScoreMode.COMPLETE, Query.Postings.NONE, 1f).extractTerms(set); + qr.createWeight(searcher, ScoreMode.COMPLETE, 1f).extractTerms(set); assertEquals(2, set.size()); } @@ -253,7 +253,7 @@ public void testSpans0() throws Exception { SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,0,1); assertNext(span, 1,1,2); @@ -275,8 +275,8 @@ public void testSpans1() throws Exception { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); - Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanA = qA.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spanB = qB.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); while (spanA.nextDoc() != Spans.NO_MORE_DOCS) { assertNotSame("spanB not still going", Spans.NO_MORE_DOCS, spanB.nextDoc()); @@ -301,7 +301,7 @@ public void testSpans2() throws Exception { new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.NONE, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,1,2); assertNext(span, 2,0,1); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index d38db81b19bd..7cb18cd1ef73 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -123,7 +123,7 @@ public String s(int doc, int start, int end) { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span,0,0,3); assertNext(span,1,0,4); assertFinished(span); @@ -136,7 +136,7 @@ public void testNearSpansNext() throws Exception { */ public void testNearSpansAdvanceLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -148,7 +148,7 @@ public void testNearSpansAdvanceLikeNext() throws Exception { public void testNearSpansNextThenAdvance() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -160,7 +160,7 @@ public void testNearSpansNextThenAdvance() throws Exception { public void testNearSpansNextThenAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNotSame(Spans.NO_MORE_DOCS, span.nextDoc()); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -169,13 +169,13 @@ public void testNearSpansNextThenAdvancePast() throws Exception { public void testNearSpansAdvancePast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(Spans.NO_MORE_DOCS, span.advance(2)); } public void testNearSpansAdvanceTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, span.advance(0)); assertEquals(0, span.nextStartPosition()); assertEquals(s(0,0,3), s(span)); @@ -183,7 +183,7 @@ public void testNearSpansAdvanceTo0() throws Exception { public void testNearSpansAdvanceTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, span.advance(1)); assertEquals(0, span.nextStartPosition()); assertEquals(s(1,0,4), s(span)); @@ -222,7 +222,7 @@ public void testOrderedSpanIteration() throws Exception { new SpanOrQuery(new SpanTermQuery(new Term(FIELD, "w1")), new SpanTermQuery(new Term(FIELD, "w2"))), new SpanTermQuery(new Term(FIELD, "w4")) }, 10, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,0,0,4); assertNext(spans,0,1,4); assertFinished(spans); @@ -232,7 +232,7 @@ public void testOrderedSpanIterationSameTerms1() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,0,2); assertFinished(spans); } @@ -241,7 +241,7 @@ public void testOrderedSpanIterationSameTerms2() throws Exception { SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term(FIELD, "t2")), new SpanTermQuery(new Term(FIELD, "t1")) }, 1, true); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans,4,1,4); assertNext(spans,4,2,4); assertFinished(spans); @@ -265,7 +265,7 @@ public void testGaps() throws Exception { .addGap(1) .addClause(new SpanTermQuery(new Term(FIELD, "w2"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 1, 0, 3); assertNext(spans, 2, 0, 3); assertFinished(spans); @@ -278,7 +278,7 @@ public void testGaps() throws Exception { .addClause(new SpanTermQuery(new Term(FIELD, "w3"))) .setSlop(1) .build(); - spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 2, 0, 5); assertNext(spans, 3, 0, 6); assertFinished(spans); @@ -290,7 +290,7 @@ public void testMultipleGaps() throws Exception { .addGap(2) .addClause(new SpanTermQuery(new Term(FIELD, "g"))) .build(); - Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 5, 0, 4); assertNext(spans, 5, 9, 13); assertFinished(spans); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index 6c2d28c5c0b1..fa0bf1952d42 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -121,7 +121,7 @@ public void testNestedNearQuery() throws IOException { SpanNearQuery q7 = new SpanNearQuery(new SpanQuery[]{q1, q6}, 1, true); TermCollector collector = new TermCollector(); - Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = q7.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.advance(0)); spans.nextStartPosition(); checkCollectedTerms(spans, collector, new Term(FIELD, "w1"), new Term(FIELD, "w2"), new Term(FIELD, "w3")); @@ -141,7 +141,7 @@ public void testOrQuery() throws IOException { SpanOrQuery orQuery = new SpanOrQuery(q2, q3); TermCollector collector = new TermCollector(); - Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = orQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.advance(1)); spans.nextStartPosition(); @@ -171,7 +171,7 @@ public void testSpanNotQuery() throws IOException { SpanNotQuery notq = new SpanNotQuery(nq, q3); TermCollector collector = new TermCollector(); - Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = notq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals(2, spans.advance(2)); spans.nextStartPosition(); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java index f7b408877bd5..b4cad767706d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java @@ -73,7 +73,7 @@ void checkHits(Query query, int[] results) throws Exception { } Spans makeSpans(SpanQuery sq) throws Exception { - return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } void tstEqualSpans(String mes, SpanQuery expectedQ, SpanQuery actualQ) throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index d8b9f9216a8a..151c8ee16108 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -194,7 +194,7 @@ public void testSpanNearOrderedEqual15() throws Exception { public void testSpanNearOrderedOverlap() throws Exception { final SpanQuery query = spanNearOrderedQuery(field, 1, "t1", "t2", "t3"); - Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertEquals("first doc", 11, spans.nextDoc()); assertEquals("first start", 0, spans.nextStartPosition()); @@ -209,7 +209,7 @@ public void testSpanNearOrderedOverlap() throws Exception { public void testSpanNearUnOrdered() throws Exception { //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test SpanQuery senq = spanNearUnorderedQuery(field, 0, "u1", "u2"); - Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 1, 3); assertNext(spans, 5, 2, 4); assertNext(spans, 8, 2, 4); @@ -218,7 +218,7 @@ public void testSpanNearUnOrdered() throws Exception { assertFinished(spans); senq = spanNearUnorderedQuery(1, senq, spanTermQuery(field, "u2")); - spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(spans, 4, 0, 3); assertNext(spans, 4, 1, 3); // unordered spans can be subsets assertNext(spans, 5, 0, 4); @@ -232,7 +232,7 @@ public void testSpanNearUnOrdered() throws Exception { } private Spans orSpans(String[] terms) throws Exception { - return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); } public void testSpanOrEmpty() throws Exception { @@ -414,7 +414,7 @@ private int spanCount(String include, int slop, String exclude, int pre, int pos SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms); SpanQuery eq = spanTermQuery(field, exclude); SpanQuery snq = spanNotQuery(iq, eq, pre, post); - Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, Query.Postings.POSITIONS, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); int i = 0; if (spans != null) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 4d98f89017a2..2a6376df1f17 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -52,9 +52,9 @@ public AssertingIndexSearcher(Random random, IndexReaderContext context, Execut } @Override - public Weight createWeight(Query query, ScoreMode scoreMode, Query.Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { // this adds assertions to the inner weights/scorers too - return new AssertingWeight(random, super.createWeight(query, scoreMode, minRequiredPostings, boost), scoreMode); + return new AssertingWeight(random, super.createWeight(query, scoreMode, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java index e136eaa1023e..b3d2f8116c44 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java @@ -39,9 +39,9 @@ public static Query wrap(Random random, Query query) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { assert boost >= 0; - return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, minRequiredPostings, boost), scoreMode); + return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, scoreMode, boost), scoreMode); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 98e56a255875..4b982bb45a8d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -73,8 +73,8 @@ public Query rewrite(IndexReader reader) throws IOException { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight inWeight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight inWeight = query.createWeight(searcher, scoreMode, boost); if (scoreMode.needsScores() == false) { return inWeight; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index 8c408b17276c..fcb48a8d0e84 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -62,8 +62,8 @@ public String toString(String field) { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - final Weight weight = query.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + final Weight weight = query.createWeight(searcher, scoreMode, boost); return new RandomApproximationWeight(weight, new Random(random.nextLong())); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java index bcd9bf1563dc..f24a4ff8fe37 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -43,8 +43,8 @@ public String toString(String field) { } @Override - public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - SpanWeight weight = in.createWeight(searcher, scoreMode, minRequiredPostings, boost); + public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + SpanWeight weight = in.createWeight(searcher, scoreMode, boost); return new AssertingSpanWeight(searcher, weight); } diff --git a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java index d36f3e21c72a..2f0f067c5136 100644 --- a/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java +++ b/lucene/test-framework/src/test/org/apache/lucene/search/TestBaseExplanationTestCase.java @@ -72,8 +72,8 @@ public BrokenExplainTermQuery(Term t, boolean toggleExplainMatch, boolean breakE this.breakExplainScores = breakExplainScores; } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, Postings minRequiredPostings, float boost) throws IOException { - return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, minRequiredPostings, boost)); + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new BrokenExplainWeight(this, super.createWeight(searcher,scoreMode, boost)); } } From bf075736fceef96229628a054a8da90dd7ba7fd1 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 10:22:43 +0000 Subject: [PATCH 44/83] cleanup --- .../org/apache/lucene/index/PostingsEnum.java | 4 ---- .../org/apache/lucene/search/BooleanWeight.java | 1 - .../lucene/search/ConstantScoreWeight.java | 1 - .../org/apache/lucene/search/DisiWrapper.java | 16 ---------------- .../apache/lucene/search/DoubleValuesSource.java | 1 - .../org/apache/lucene/search/FilterWeight.java | 2 +- .../src/java/org/apache/lucene/search/Query.java | 2 -- .../org/apache/lucene/search/QueryRescorer.java | 1 - .../java/org/apache/lucene/search/TermQuery.java | 1 - .../search/spans/TestNearSpansOrdered.java | 1 - .../lucene/search/spans/TestSpanCollection.java | 1 - .../org/apache/lucene/search/QueryUtils.java | 1 - .../lucene/search/ScorerIndexSearcher.java | 1 - 13 files changed, 1 insertion(+), 32 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index bb93268ff92b..fdd32a9f2fe0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -63,10 +63,6 @@ public static boolean featureRequested(int flags, short feature) { return (flags & feature) == feature; } - public static short highest(short a, short b) { - return (short) Math.max(a, b); - } - private AttributeSource atts = null; /** Sole constructor. (For invocation by subclass diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 829d72a5ff19..fffdd09093f1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java index cdf4be94f3c8..671ec7103782 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreWeight.java @@ -21,7 +21,6 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; /** diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index b8891b7c0ce2..0a581804aaa3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -82,21 +82,5 @@ public DisiWrapper(Spans spans) { this.lastApproxMatchDoc = -2; } - public DisiWrapper(DocIdSetIterator disi) { - this.scorer = null; - this.spans = null; - this.iterator = disi; - this.cost = iterator.cost(); - this.doc = -1; - this.twoPhaseView = TwoPhaseIterator.unwrap(disi); - if (twoPhaseView != null) { - approximation = twoPhaseView.approximation(); - matchCost = twoPhaseView.matchCost(); - } - else { - approximation = iterator; - matchCost = 0f; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java index 3c52172019dd..3938d3f9ba22 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; /** * Base class for producing {@link DoubleValues} diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 278ad987a225..925c9534f898 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -47,7 +47,7 @@ protected FilterWeight(Weight weight) { /** * Alternative constructor. * Use this variant only if the weight was not obtained - * via the {@link Query#createWeight(IndexSearcher, ScoreMode, Query.Postings, float)} + * via the {@link Query#createWeight(IndexSearcher, ScoreMode, float)} * method of the query object. */ protected FilterWeight(Query query, Weight weight) { diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index 22631b393490..54de63fc02fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -20,8 +20,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.search.spans.SpanWeight; /** The abstract base class for queries.

    Instantiable subclasses are: diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index e98099691b09..6b19f295a7d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -23,7 +23,6 @@ import java.util.List; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 6ee9c0a61cbf..79dd976b7789 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 7cb18cd1ef73..072d3818490a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -28,7 +28,6 @@ import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TopDocs; diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java index fa0bf1952d42..ff9327526d22 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanCollection.java @@ -31,7 +31,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index 71592a8a3701..fa113113f81a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -32,7 +32,6 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java index 97c5c7a1338e..ae699130190d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ScorerIndexSearcher.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.Bits; /** From 485bffbcbd0de0577d27b7b1350017ccab2a96c3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 22 Feb 2018 11:52:56 +0000 Subject: [PATCH 45/83] Test scoring + fix compared with phrase query --- .../org/apache/lucene/search/IntervalIterator.java | 2 +- .../org/apache/lucene/search/IntervalQuery.java | 2 +- .../org/apache/lucene/search/TestIntervalQuery.java | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a65aa1d87c5c..b203395b38a9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -34,7 +34,7 @@ public interface IntervalIterator { int nextInterval() throws IOException; default float score() { - return (float) (1.0 / (1.0 + (end() - start()))); + return (float) (1.0 / (end() - start())); } IntervalIterator EMPTY = new IntervalIterator() { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index f409de96817f..6de2e19c5f51 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -148,7 +148,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); LeafSimScorer leafScorer = simScorer == null ? null - : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.POSITIVE_INFINITY); + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 77029ffffc4b..09259cd182f1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -71,6 +71,19 @@ private void checkHits(Query query, int[] results) throws IOException { CheckHits.checkHits(random(), query, field, searcher, results); } + public void testScoring() throws IOException { + PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); + Query equiv = IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + + TopDocs td1 = searcher.search(pq, 10); + TopDocs td2 = searcher.search(equiv, 10); + assertEquals(td1.totalHits, td2.totalHits); + for (int i = 0; i < td1.scoreDocs.length; i++) { + assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc); + assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 0f); + } + } + public void testOrderedNearQueryWidth0() throws IOException { checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), From c7d3e3b386d732a9cbc8b1dae55d44c1dcc98369 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 14:06:52 +0000 Subject: [PATCH 46/83] Add some difference intervals --- .../search/IntervalDifferenceFunction.java | 133 ++++++++++++++ .../search/IntervalDifferenceQuery.java | 166 ++++++++++++++++++ .../lucene/search/IntervalFunction.java | 46 ++++- .../lucene/search/IntervalIterator.java | 2 +- .../apache/lucene/search/IntervalQuery.java | 55 ++++-- .../apache/lucene/search/IntervalScorer.java | 40 ++++- .../org/apache/lucene/search/Intervals.java | 61 ++++++- .../lucene/search/TestIntervalQuery.java | 31 +++- .../apache/lucene/search/TestIntervals.java | 11 +- 9 files changed, 506 insertions(+), 39 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java new file mode 100644 index 000000000000..c1094bd3ffca --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public abstract class IntervalDifferenceFunction { + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + + @Override + public abstract String toString(); + + public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); + + public static final IntervalDifferenceFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return Intervals.difference(minuend, subtrahend); + } + }; + + public static class NotWithinFunction extends IntervalDifferenceFunction { + + private final int positions; + + public NotWithinFunction(int positions) { + this.positions = positions; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NotWithinFunction that = (NotWithinFunction) o; + return positions == that.positions; + } + + @Override + public String toString() { + return "NOTWITHIN/" + positions; + } + + @Override + public int hashCode() { + return Objects.hash(positions); + } + + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + IntervalIterator notWithin = new IntervalIterator() { + @Override + public int start() { + int start = subtrahend.start(); + return Math.max(0, start - positions); + } + + @Override + public int end() { + int end = subtrahend.end(); + int newEnd = end + positions; + if (newEnd < 0) // check for overflow + return Integer.MAX_VALUE; + return newEnd; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean reset(int doc) throws IOException { + return subtrahend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + return subtrahend.nextInterval(); + } + }; + return NOT_CONTAINING.apply(minuend, notWithin); + } + } + + private static abstract class SingletonFunction extends IntervalDifferenceFunction { + + private final String name; + + protected SingletonFunction(String name) { + this.name = name; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return obj == this; + } + + @Override + public String toString() { + return name; + } + + } + + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java new file mode 100644 index 000000000000..99d5274e51d5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; + +public class IntervalDifferenceQuery extends Query { + + public static IntervalDifferenceQuery notContaining(String field, Query minuend, Query subtrahend) { + return new IntervalDifferenceQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NOT_CONTAINING); + } + + public static IntervalDifferenceQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { + return new IntervalDifferenceQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + } + + private final Query minuend; + private final Query subtrahend; + private final IntervalDifferenceFunction function; + private final String field; + + protected IntervalDifferenceQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + this.minuend = minuend; + this.subtrahend = subtrahend; + this.function = function; + this.field = field; + } + + @Override + public String toString(String field) { + return function + "(" + minuend + ", " + subtrahend + ")"; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight minuendWeight = searcher.createWeight(minuend, ScoreMode.COMPLETE_POSITIONS, 1); + Weight subtrahendWeight = searcher.createWeight(subtrahend, ScoreMode.COMPLETE_POSITIONS, 1); + return new IntervalDifferenceWeight(minuendWeight, subtrahendWeight, scoreMode, + searcher.getSimilarity(), IntervalQuery.buildSimScorer(field, searcher, Collections.singletonList(minuendWeight), boost)); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewrittenMinuend = minuend.rewrite(reader); + Query rewrittenSubtrahend = subtrahend.rewrite(reader); + if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { + return new IntervalDifferenceQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + } + return this; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IntervalDifferenceQuery that = (IntervalDifferenceQuery) o; + return Objects.equals(minuend, that.minuend) && + Objects.equals(subtrahend, that.subtrahend) && + Objects.equals(function, that.function); + } + + @Override + public int hashCode() { + return Objects.hash(minuend, subtrahend, function); + } + + private class IntervalDifferenceWeight extends Weight { + + final Weight minuendWeight; + final Weight subtrahendWeight; + final ScoreMode scoreMode; + final Similarity similarity; + final Similarity.SimScorer simScorer; + + private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, + Similarity similarity, Similarity.SimScorer simScorer) { + super(IntervalDifferenceQuery.this); + this.minuendWeight = minuendWeight; + this.subtrahendWeight = subtrahendWeight; + this.scoreMode = scoreMode; + this.similarity = similarity; + this.simScorer = simScorer; + } + + @Override + public void extractTerms(Set terms) { + this.minuendWeight.extractTerms(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + IntervalScorer scorer = (IntervalScorer) scorer(context); + if (scorer != null) { + int newDoc = scorer.iterator().advance(doc); + if (newDoc == doc) { + return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); + } + } + return Explanation.noMatch("no matching intervals"); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + Scorer minuendScorer = minuendWeight.scorer(context); + Scorer subtrahendScorer = subtrahendWeight.scorer(context); + if (subtrahendScorer == null || minuendScorer == null) + return minuendScorer; + + IntervalIterator minuendIt = minuendScorer.intervals(field); + IntervalIterator subtrahendIt = subtrahendScorer.intervals(field); + if (subtrahendIt == IntervalIterator.EMPTY || subtrahendIt == null) + return minuendScorer; + + LeafSimScorer leafScorer = simScorer == null ? null + : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); + + return new IntervalScorer(this, field, minuendScorer.iterator(), function.apply(minuendIt, subtrahendIt), leafScorer){ + @Override + public TwoPhaseIterator twoPhaseIterator() { + return new TwoPhaseIterator(approximation) { + @Override + public boolean matches() throws IOException { + if (subtrahendScorer.docID() < approximation.docID()) { + subtrahendScorer.iterator().advance(approximation.docID()); + } + return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + }; + } + }; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return minuendWeight.isCacheable(ctx) && subtrahendWeight.isCacheable(ctx); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index f3adf6c02076..87cca4ba8b1a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -17,11 +17,12 @@ package org.apache.lucene.search; +import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Objects; -import java.util.function.Function; -public abstract class IntervalFunction implements Function, IntervalIterator> { +public abstract class IntervalFunction { @Override public abstract int hashCode(); @@ -32,6 +33,15 @@ public abstract class IntervalFunction implements Function iterators); + + public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.orderedIntervalIterator(intervalIterators); + } + }; + public static class OrderedNearFunction extends IntervalFunction { public OrderedNearFunction(int minWidth, int maxWidth) { @@ -67,6 +77,13 @@ public int hashCode() { } } + public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return Intervals.unorderedIntervalIterator(intervalIterators); + } + }; + public static class UnorderedNearFunction extends IntervalFunction { final int minWidth; @@ -103,4 +120,29 @@ public int hashCode() { } } + private static abstract class SingletonFunction extends IntervalFunction { + + private final String name; + + protected SingletonFunction(String name) { + this.name = name; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return obj == this; + } + + @Override + public String toString() { + return name; + } + + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index b203395b38a9..b14211e0eea1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -34,7 +34,7 @@ public interface IntervalIterator { int nextInterval() throws IOException; default float score() { - return (float) (1.0 / (end() - start())); + return (float) (1.0 / (1 + innerWidth())); } IntervalIterator EMPTY = new IntervalIterator() { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 6de2e19c5f51..6903626257b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -27,26 +27,45 @@ import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; public final class IntervalQuery extends Query { - private final String field; - private final List subQueries; - private final IntervalFunction iteratorFunction; - - public static IntervalQuery orderedNearQuery(String field, int width, Query... subQueries) { + public static IntervalQuery ordered(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } - public static IntervalQuery unorderedNearQuery(String field, int width, Query... subQueries) { + public static IntervalQuery ordered(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + } + + public static IntervalQuery ordered(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); + } + + public static IntervalQuery unordered(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } + public static IntervalQuery unordered(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); + } + + public static IntervalQuery unordered(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); + } + + private final String field; + private final List subQueries; + private final IntervalFunction iteratorFunction; + protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + this(field, subQueries, null, iteratorFunction); + } + + protected IntervalQuery(String field, List subQueries, Query subtrahend, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; this.iteratorFunction = iteratorFunction; @@ -68,13 +87,14 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo for (Query q : subQueries) { subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); } - return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(searcher, subWeights, boost) : null, scoreMode); + return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(field, searcher, subWeights, boost) : null, + searcher.getSimilarity(), scoreMode); } - private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, List subWeights, float boost) throws IOException { + static Similarity.SimScorer buildSimScorer(String field, IndexSearcher searcher, List subWeights, float boost) throws IOException { Set terms = new HashSet<>(); for (Weight w : subWeights) { - w.extractTerms(terms); // nocommit can we do this without building TermStates twice? + w.extractTerms(terms); } TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; @@ -107,12 +127,14 @@ private class IntervalWeight extends Weight { final List subWeights; final Similarity.SimScorer simScorer; + final Similarity similarity; final ScoreMode scoreMode; - public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, ScoreMode scoreMode) { + public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { super(query); this.subWeights = subWeights; this.simScorer = simScorer; + this.similarity = similarity; this.scoreMode = scoreMode; } @@ -125,11 +147,14 @@ public void extractTerms(Set terms) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context); - if (scorer != null && scorer.iterator().advance(doc) == doc) { - return Explanation.match(scorer.score(), "Intervals match"); // nocommit improve this + IntervalScorer scorer = (IntervalScorer) scorer(context); + if (scorer != null) { + int newDoc = scorer.iterator().advance(doc); + if (newDoc == doc) { + return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); + } } - return Explanation.noMatch("No matching intervals"); + return Explanation.noMatch("no matching intervals"); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index e9cdc1aa4402..fb9476350618 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -21,12 +21,16 @@ class IntervalScorer extends Scorer { - private final IntervalIterator intervals; + protected final IntervalIterator intervals; private final String field; - private final DocIdSetIterator approximation; + protected final DocIdSetIterator approximation; private final LeafSimScorer simScorer; - protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, IntervalIterator intervals, LeafSimScorer simScorer) { + private float freq = -1; + private int lastScoredDoc = -1; + + protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, + IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; this.approximation = approximation; @@ -41,13 +45,35 @@ public int docID() { @Override public float score() throws IOException { - float freq = 0; - do { - freq += intervals.score(); - } while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + ensureFreq(); return simScorer.score(docID(), freq); } + public Explanation explain(String topLevel) throws IOException { + ensureFreq(); + Explanation freqExplanation = Explanation.match(freq, "intervalFreq=" + freq); + Explanation scoreExplanation = simScorer.explain(docID(), freqExplanation); + return Explanation.match(scoreExplanation.getValue(), + topLevel + ", result of:", + scoreExplanation); + } + + public float freq() throws IOException { + ensureFreq(); + return freq; + } + + private void ensureFreq() throws IOException { + if (lastScoredDoc != docID()) { + lastScoredDoc = docID(); + freq = 0; + do { + freq += intervals.score(); + } + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + } + } + @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index d7bd588728b7..e9c32ee6c174 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -232,7 +232,10 @@ public boolean reset(int doc) throws IOException { positioned &= subIterator.reset(doc); subIterator.nextInterval(); queue.add(subIterator); - queueEnd = Math.max(queueEnd, subIterator.end()); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } } return positioned; } @@ -273,4 +276,60 @@ public int nextInterval() throws IOException { } + public static IntervalIterator difference(IntervalIterator minuend, IntervalIterator subtrahend) { + return new DifferenceIterator(minuend, subtrahend); + } + + private static class DifferenceIterator implements IntervalIterator { + + final IntervalIterator minuend; + final IntervalIterator subtrahend; + boolean subPositioned; + + private DifferenceIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + this.minuend = minuend; + this.subtrahend = subtrahend; + } + + @Override + public int start() { + return minuend.start(); + } + + @Override + public int end() { + return minuend.end(); + } + + @Override + public int innerWidth() { + return minuend.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + subPositioned = subtrahend.reset(doc); + if (subPositioned) + subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; + return minuend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (subPositioned == false) + return minuend.nextInterval(); + while (minuend.nextInterval() != NO_MORE_INTERVALS) { + while (subtrahend.end() < minuend.start()) { + if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { + subPositioned = false; + return minuend.start(); + } + } + if (subtrahend.start() > minuend.end()) + return minuend.start(); + } + return NO_MORE_INTERVALS; + } + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 09259cd182f1..5b943cd67237 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,28 +85,28 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 0, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 1, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(IntervalQuery.orderedNearQuery(field, 2, new TermQuery(new Term(field, "w1")), + checkHits(IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = IntervalQuery.orderedNearQuery(field, 1, + Query q = IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), - IntervalQuery.orderedNearQuery(field, 2, + IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))) ); @@ -114,4 +114,23 @@ public void testNestedOrderedNearQuery() throws IOException { checkHits(q, new int[]{0, 1, 2}); } + public void testUnorderedQuery() throws IOException { + Query q = IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + checkHits(q, new int[]{0, 1, 2, 3, 5}); + } + + public void testNotContainingQuery() throws IOException { + Query q = IntervalDifferenceQuery.notContaining(field, + IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{0, 2, 4, 5}); + } + + public void testNotWithinQuery() throws IOException { + Query q = IntervalDifferenceQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + new TermQuery(new Term(field, "w2"))); + checkHits(q, new int[]{ 1, 2, 3 }); + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 80094814cf49..1c2be22c840a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -18,9 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.Arrays; -import com.carrotsearch.randomizedtesting.annotations.Seed; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -31,7 +29,6 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -129,7 +126,7 @@ public void testTermQueryIntervals() throws IOException { } public void testOrderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.orderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.ordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 3, new int[][]{ {}, @@ -142,9 +139,9 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.unordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), - "field1", 3, new int[][]{ + "field1", 4, new int[][]{ {}, { 0, 2, 2, 3, 6, 17 }, { 3, 5, 5, 6, 6, 21 }, @@ -169,7 +166,7 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(IntervalQuery.unorderedNearQuery("field1", 100, + checkIntervals(IntervalQuery.unordered("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "porridge")), new BooleanQuery.Builder() From 12ca0fd0ff98c41991ed9af6ba0d4a0c3244ac87 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 15:45:10 +0000 Subject: [PATCH 47/83] difference -> non_overlapping --- ...Query.java => ContainingIntervalQuery.java} | 18 +++++++++--------- .../search/IntervalDifferenceFunction.java | 7 +++---- .../org/apache/lucene/search/Intervals.java | 8 ++++---- .../lucene/search/TestIntervalQuery.java | 4 ++-- 4 files changed, 18 insertions(+), 19 deletions(-) rename lucene/core/src/java/org/apache/lucene/search/{IntervalDifferenceQuery.java => ContainingIntervalQuery.java} (90%) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java similarity index 90% rename from lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java rename to lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java index 99d5274e51d5..93a3ff851228 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java @@ -27,14 +27,14 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; -public class IntervalDifferenceQuery extends Query { +public class ContainingIntervalQuery extends Query { - public static IntervalDifferenceQuery notContaining(String field, Query minuend, Query subtrahend) { - return new IntervalDifferenceQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NOT_CONTAINING); + public static ContainingIntervalQuery nonOverlapping(String field, Query minuend, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); } - public static IntervalDifferenceQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { - return new IntervalDifferenceQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + public static ContainingIntervalQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); } private final Query minuend; @@ -42,7 +42,7 @@ public static IntervalDifferenceQuery notWithin(String field, Query minuend, int private final IntervalDifferenceFunction function; private final String field; - protected IntervalDifferenceQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + protected ContainingIntervalQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; @@ -67,7 +67,7 @@ public Query rewrite(IndexReader reader) throws IOException { Query rewrittenMinuend = minuend.rewrite(reader); Query rewrittenSubtrahend = subtrahend.rewrite(reader); if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new IntervalDifferenceQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + return new ContainingIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); } return this; } @@ -76,7 +76,7 @@ public Query rewrite(IndexReader reader) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - IntervalDifferenceQuery that = (IntervalDifferenceQuery) o; + ContainingIntervalQuery that = (ContainingIntervalQuery) o; return Objects.equals(minuend, that.minuend) && Objects.equals(subtrahend, that.subtrahend) && Objects.equals(function, that.function); @@ -97,7 +97,7 @@ private class IntervalDifferenceWeight extends Weight { private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, Similarity similarity, Similarity.SimScorer simScorer) { - super(IntervalDifferenceQuery.this); + super(ContainingIntervalQuery.this); this.minuendWeight = minuendWeight; this.subtrahendWeight = subtrahendWeight; this.scoreMode = scoreMode; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java index c1094bd3ffca..656479fea911 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.List; import java.util.Objects; public abstract class IntervalDifferenceFunction { @@ -34,10 +33,10 @@ public abstract class IntervalDifferenceFunction { public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); - public static final IntervalDifferenceFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return Intervals.difference(minuend, subtrahend); + return Intervals.nonOverlapping(minuend, subtrahend); } }; @@ -100,7 +99,7 @@ public int nextInterval() throws IOException { return subtrahend.nextInterval(); } }; - return NOT_CONTAINING.apply(minuend, notWithin); + return NON_OVERLAPPING.apply(minuend, notWithin); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index e9c32ee6c174..e8e16744fe07 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -276,17 +276,17 @@ public int nextInterval() throws IOException { } - public static IntervalIterator difference(IntervalIterator minuend, IntervalIterator subtrahend) { - return new DifferenceIterator(minuend, subtrahend); + public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NonOverlappingIterator(minuend, subtrahend); } - private static class DifferenceIterator implements IntervalIterator { + private static class NonOverlappingIterator implements IntervalIterator { final IntervalIterator minuend; final IntervalIterator subtrahend; boolean subPositioned; - private DifferenceIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { this.minuend = minuend; this.subtrahend = subtrahend; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 5b943cd67237..110b03df3864 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -120,7 +120,7 @@ public void testUnorderedQuery() throws IOException { } public void testNotContainingQuery() throws IOException { - Query q = IntervalDifferenceQuery.notContaining(field, + Query q = ContainingIntervalQuery.nonOverlapping(field, IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new TermQuery(new Term(field, "w3"))); @@ -128,7 +128,7 @@ public void testNotContainingQuery() throws IOException { } public void testNotWithinQuery() throws IOException { - Query q = IntervalDifferenceQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + Query q = ContainingIntervalQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, new TermQuery(new Term(field, "w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } From 5c3d731fb13c42ffc583f4bd9a4be6d8f2b7d373 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 23 Feb 2018 15:58:07 +0000 Subject: [PATCH 48/83] Rearrange things a bit --- .../search/ContainingIntervalQuery.java | 8 - .../search/IntervalDifferenceFunction.java | 60 +++- .../apache/lucene/search/IntervalFilter.java | 20 ++ .../lucene/search/IntervalFunction.java | 186 ++++++++++- .../apache/lucene/search/IntervalQuery.java | 24 -- .../org/apache/lucene/search/Intervals.java | 305 +----------------- .../org/apache/lucene/search/TermScorer.java | 56 +++- .../lucene/search/TestIntervalQuery.java | 20 +- .../apache/lucene/search/TestIntervals.java | 6 +- 9 files changed, 346 insertions(+), 339 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java index 93a3ff851228..8d564942e864 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java @@ -29,14 +29,6 @@ public class ContainingIntervalQuery extends Query { - public static ContainingIntervalQuery nonOverlapping(String field, Query minuend, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); - } - - public static ContainingIntervalQuery notWithin(String field, Query minuend, int positions, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); - } - private final Query minuend; private final Query subtrahend; private final IntervalDifferenceFunction function; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java index 656479fea911..6b5d58c9fe8d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Objects; +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + public abstract class IntervalDifferenceFunction { @Override @@ -36,10 +38,66 @@ public abstract class IntervalDifferenceFunction { public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return Intervals.nonOverlapping(minuend, subtrahend); + return nonOverlapping(minuend, subtrahend); } }; + public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NonOverlappingIterator(minuend, subtrahend); + } + + private static class NonOverlappingIterator implements IntervalIterator { + + final IntervalIterator minuend; + final IntervalIterator subtrahend; + boolean subPositioned; + + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + this.minuend = minuend; + this.subtrahend = subtrahend; + } + + @Override + public int start() { + return minuend.start(); + } + + @Override + public int end() { + return minuend.end(); + } + + @Override + public int innerWidth() { + return minuend.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + subPositioned = subtrahend.reset(doc); + if (subPositioned) + subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; + return minuend.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (subPositioned == false) + return minuend.nextInterval(); + while (minuend.nextInterval() != NO_MORE_INTERVALS) { + while (subtrahend.end() < minuend.start()) { + if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { + subPositioned = false; + return minuend.start(); + } + } + if (subtrahend.start() > minuend.end()) + return minuend.start(); + } + return NO_MORE_INTERVALS; + } + } + public static class NotWithinFunction extends IntervalDifferenceFunction { private final int positions; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index b2b930db570e..4f33a85861f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -21,6 +21,26 @@ public abstract class IntervalFilter implements IntervalIterator { + public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = end() - start(); + return width >= minWidth && width <= maxWidth; + } + }; + } + + public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { + return new IntervalFilter(in) { + @Override + protected boolean accept() { + int width = innerWidth(); + return width >= minWidth && width <= maxWidth; + } + }; + } + private final IntervalIterator in; public IntervalFilter(IntervalIterator in) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 87cca4ba8b1a..5a94a25d0a33 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -22,6 +22,10 @@ import java.util.List; import java.util.Objects; +import org.apache.lucene.util.PriorityQueue; + +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + public abstract class IntervalFunction { @Override @@ -38,7 +42,7 @@ public abstract class IntervalFunction { public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.orderedIntervalIterator(intervalIterators); + return orderedIntervalIterator(intervalIterators); } }; @@ -54,7 +58,7 @@ public OrderedNearFunction(int minWidth, int maxWidth) { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.innerWidthFilter(Intervals.orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + return IntervalFilter.innerWidthFilter(orderedIntervalIterator(intervalIterators), minWidth, maxWidth); } @Override @@ -77,10 +81,87 @@ public int hashCode() { } } + public static IntervalIterator orderedIntervalIterator(List subIterators) { + for (IntervalIterator it : subIterators) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new OrderedIntervalIterator(subIterators); + } + + private static class OrderedIntervalIterator implements IntervalIterator { + + final List subIntervals; + + int start; + int end; + int innerWidth; + int i; + + private OrderedIntervalIterator(List subIntervals) { + this.subIntervals = subIntervals; + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerWidth; + } + + @Override + public boolean reset(int doc) throws IOException { + boolean positioned = true; + for (IntervalIterator it : subIntervals) { + positioned &= it.reset(doc); + } + subIntervals.get(0).nextInterval(); + i = 1; + start = end = innerWidth = Integer.MIN_VALUE; + return positioned; + } + + @Override + public int nextInterval() throws IOException { + start = end = NO_MORE_INTERVALS; + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + if (subIntervals.get(i - 1).end() >= b) + return start; + if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + break; + do { + if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + return start; + } + while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + i++; + } + start = subIntervals.get(0).start(); + end = subIntervals.get(subIntervals.size() - 1).end(); + b = subIntervals.get(subIntervals.size() - 1).start(); + innerWidth = b - subIntervals.get(0).end() - 1; + i = 1; + if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + return start; + } + } + } + + public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.unorderedIntervalIterator(intervalIterators); + return unorderedIntervalIterator(intervalIterators); } }; @@ -96,7 +177,7 @@ public UnorderedNearFunction(int minWidth, int maxWidth) { @Override public IntervalIterator apply(List intervalIterators) { - return Intervals.innerWidthFilter(Intervals.unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); + return IntervalFilter.innerWidthFilter(unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); } @Override @@ -120,6 +201,103 @@ public int hashCode() { } } + public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + for (IntervalIterator it : subIntervals) { + if (it == IntervalIterator.EMPTY) + return IntervalIterator.EMPTY; + } + return new UnorderedIntervalIterator(subIntervals); + } + + private static class UnorderedIntervalIterator implements IntervalIterator { + + private final PriorityQueue queue; + private final IntervalIterator[] subIterators; + + int start, end, innerStart, innerEnd, queueEnd; + + UnorderedIntervalIterator(List subIterators) { + this.queue = new PriorityQueue(subIterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); + } + }; + this.subIterators = new IntervalIterator[subIterators.size()]; + + for (int i = 0; i < subIterators.size(); i++) { + this.subIterators[i] = subIterators.get(i); + } + } + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return end; + } + + @Override + public int innerWidth() { + return innerEnd - innerStart + 1; + } + + @Override + public boolean reset(int doc) throws IOException { + this.queue.clear(); + this.queueEnd = start = end = innerEnd = innerStart = -1; + boolean positioned = true; + for (IntervalIterator subIterator : subIterators) { + positioned &= subIterator.reset(doc); + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } + } + return positioned; + } + + void updateRightExtreme(IntervalIterator it) { + int itEnd = it.end(); + if (itEnd > queueEnd) { + queueEnd = itEnd; + innerEnd = it.start(); + } + } + + @Override + public int nextInterval() throws IOException { + while (this.queue.size() == subIterators.length && queue.top().start() == start) { + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } + if (this.queue.size() < subIterators.length) + return NO_MORE_INTERVALS; + do { + start = queue.top().start(); + innerStart = queue.top().end(); + end = queueEnd; + if (queue.top().end() == end) + return start; + IntervalIterator it = queue.pop(); + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && end == queueEnd); + return start; + } + + } + private static abstract class SingletonFunction extends IntervalFunction { private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 6903626257b2..dfab7da1e3c7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -33,30 +33,6 @@ public final class IntervalQuery extends Query { - public static IntervalQuery ordered(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); - } - - public static IntervalQuery ordered(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); - } - - public static IntervalQuery ordered(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); - } - - public static IntervalQuery unordered(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); - } - - public static IntervalQuery unordered(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); - } - - public static IntervalQuery unordered(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); - } - private final String field; private final List subQueries; private final IntervalFunction iteratorFunction; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index e8e16744fe07..ab12bad0d655 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.lucene.index.PostingsEnum; @@ -27,309 +28,37 @@ public final class Intervals { public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = end() - start(); - return width >= minWidth && width <= maxWidth; - } - }; + public static Query orderedQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } - public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = innerWidth(); - return width >= minWidth && width <= maxWidth; - } - }; + public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } - public static IntervalIterator termIterator(PostingsEnum pe) { - return new TermIntervalIterator(pe); + public static Query orderedQuery(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); } - private static class TermIntervalIterator implements IntervalIterator { - - public TermIntervalIterator(PostingsEnum pe) { - this.pe = pe; - } - - private final PostingsEnum pe; - - int upTo = -1; - int pos = -1; - - @Override - public int start() { - return pos; - } - - @Override - public int end() { - return pos; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) throws IOException { - if (pe.docID() == doc) { - upTo = pe.freq(); - pos = -1; - return true; - } - upTo = -1; - return false; - } - - @Override - public int nextInterval() throws IOException { - if (upTo <= 0) { - return pos = NO_MORE_INTERVALS; - } - upTo--; - return pos = pe.nextPosition(); - } - - @Override - public String toString() { - return pe.docID() + "[" + pos + "]"; - } + public static Query unorderedQuery(String field, int width, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } - public static IntervalIterator orderedIntervalIterator(List subIterators) { - for (IntervalIterator it : subIterators) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } - return new OrderedIntervalIterator(subIterators); - } - - private static class OrderedIntervalIterator implements IntervalIterator { - - final List subIntervals; - - int start; - int end; - int innerWidth; - int i; - - private OrderedIntervalIterator(List subIntervals) { - this.subIntervals = subIntervals; - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int innerWidth() { - return innerWidth; - } - - @Override - public boolean reset(int doc) throws IOException { - boolean positioned = true; - for (IntervalIterator it : subIntervals) { - positioned &= it.reset(doc); - } - subIntervals.get(0).nextInterval(); - i = 1; - start = end = innerWidth = Integer.MIN_VALUE; - return positioned; - } - - @Override - public int nextInterval() throws IOException { - start = end = NO_MORE_INTERVALS; - int b = Integer.MAX_VALUE; - while (true) { - while (true) { - if (subIntervals.get(i - 1).end() >= b) - return start; - if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) - break; - do { - if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) - return start; - } - while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); - i++; - } - start = subIntervals.get(0).start(); - end = subIntervals.get(subIntervals.size() - 1).end(); - b = subIntervals.get(subIntervals.size() - 1).start(); - innerWidth = b - subIntervals.get(0).end() - 1; - i = 1; - if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) - return start; - } - } + public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } - public static IntervalIterator unorderedIntervalIterator(List subIntervals) { - for (IntervalIterator it : subIntervals) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } - return new UnorderedIntervalIterator(subIntervals); + public static Query unorderedQuery(String field, Query... subQueries) { + return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); } - private static class UnorderedIntervalIterator implements IntervalIterator { - - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; - - int start, end, innerStart, innerEnd, queueEnd; - - UnorderedIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); - } - }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int innerWidth() { - return innerEnd - innerStart + 1; - } - - @Override - public boolean reset(int doc) throws IOException { - this.queue.clear(); - this.queueEnd = start = end = innerEnd = innerStart = -1; - boolean positioned = true; - for (IntervalIterator subIterator : subIterators) { - positioned &= subIterator.reset(doc); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); - } - } - return positioned; - } - - void updateRightExtreme(IntervalIterator it) { - int itEnd = it.end(); - if (itEnd > queueEnd) { - queueEnd = itEnd; - innerEnd = it.start(); - } - } - - @Override - public int nextInterval() throws IOException { - while (this.queue.size() == subIterators.length && queue.top().start() == start) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } - if (this.queue.size() < subIterators.length) - return NO_MORE_INTERVALS; - do { - start = queue.top().start(); - innerStart = queue.top().end(); - end = queueEnd; - if (queue.top().end() == end) - return start; - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } while (this.queue.size() == subIterators.length && end == queueEnd); - return start; - } - + public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); } - public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NonOverlappingIterator(minuend, subtrahend); + public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { + return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); } - private static class NonOverlappingIterator implements IntervalIterator { - - final IntervalIterator minuend; - final IntervalIterator subtrahend; - boolean subPositioned; - - private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - this.minuend = minuend; - this.subtrahend = subtrahend; - } - - @Override - public int start() { - return minuend.start(); - } - - @Override - public int end() { - return minuend.end(); - } - - @Override - public int innerWidth() { - return minuend.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - subPositioned = subtrahend.reset(doc); - if (subPositioned) - subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; - return minuend.reset(doc); - } - - @Override - public int nextInterval() throws IOException { - if (subPositioned == false) - return minuend.nextInterval(); - while (minuend.nextInterval() != NO_MORE_INTERVALS) { - while (subtrahend.end() < minuend.start()) { - if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { - subPositioned = false; - return minuend.start(); - } - } - if (subtrahend.start() > minuend.end()) - return minuend.start(); - } - return NO_MORE_INTERVALS; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 9d2c7192fc4d..ef9eeeabc026 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -24,6 +24,8 @@ import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.TermsEnum; +import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; + /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { @@ -130,7 +132,7 @@ public DocIdSetIterator iterator() { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) { - return Intervals.termIterator(postingsEnum); + return new TermIntervalIterator(postingsEnum); } return null; } @@ -159,4 +161,56 @@ public void setMinCompetitiveScore(float minScore) { /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; } + + private static class TermIntervalIterator implements IntervalIterator { + + public TermIntervalIterator(PostingsEnum pe) { + this.pe = pe; + } + + private final PostingsEnum pe; + + int upTo = -1; + int pos = -1; + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public boolean reset(int doc) throws IOException { + if (pe.docID() == doc) { + upTo = pe.freq(); + pos = -1; + return true; + } + upTo = -1; + return false; + } + + @Override + public int nextInterval() throws IOException { + if (upTo <= 0) { + return pos = NO_MORE_INTERVALS; + } + upTo--; + return pos = pe.nextPosition(); + } + + @Override + public String toString() { + return pe.docID() + "[" + pos + "]"; + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 110b03df3864..fa30610d7875 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,28 +85,28 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(IntervalQuery.ordered(field, 0, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(IntervalQuery.ordered(field, 1, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(IntervalQuery.ordered(field, 2, new TermQuery(new Term(field, "w1")), + checkHits(Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = IntervalQuery.ordered(field, 1, + Query q = Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), - IntervalQuery.ordered(field, 2, + Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))) ); @@ -115,20 +115,20 @@ public void testNestedOrderedNearQuery() throws IOException { } public void testUnorderedQuery() throws IOException { - Query q = IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); } public void testNotContainingQuery() throws IOException { - Query q = ContainingIntervalQuery.nonOverlapping(field, - IntervalQuery.unordered(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + Query q = Intervals.nonOverlappingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 2, 4, 5}); } public void testNotWithinQuery() throws IOException { - Query q = ContainingIntervalQuery.notWithin(field, new TermQuery(new Term(field, "w1")), 1, + Query q = Intervals.notWithinQuery(field, new TermQuery(new Term(field, "w1")), 1, new TermQuery(new Term(field, "w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 1c2be22c840a..e6169823c829 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -126,7 +126,7 @@ public void testTermQueryIntervals() throws IOException { } public void testOrderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.ordered("field1", 100, + checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 3, new int[][]{ {}, @@ -139,7 +139,7 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(IntervalQuery.unordered("field1", 100, + checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), "field1", 4, new int[][]{ {}, @@ -166,7 +166,7 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(IntervalQuery.unordered("field1", 100, + checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "porridge")), new BooleanQuery.Builder() From c6fa86096ea70180d4614c0cc946978fe8b3d91f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 25 Feb 2018 12:00:12 +0000 Subject: [PATCH 49/83] Tests for containing/contained_by queries --- ...n.java => DifferenceIntervalFunction.java} | 130 +++++++++++++----- ...uery.java => DifferenceIntervalQuery.java} | 12 +- .../lucene/search/IntervalFunction.java | 100 ++++++++++++++ .../apache/lucene/search/IntervalScorer.java | 2 +- .../org/apache/lucene/search/Intervals.java | 24 +++- .../lucene/search/TestIntervalQuery.java | 46 ++++++- 6 files changed, 262 insertions(+), 52 deletions(-) rename lucene/core/src/java/org/apache/lucene/search/{IntervalDifferenceFunction.java => DifferenceIntervalFunction.java} (54%) rename lucene/core/src/java/org/apache/lucene/search/{ContainingIntervalQuery.java => DifferenceIntervalQuery.java} (93%) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java similarity index 54% rename from lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java rename to lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 6b5d58c9fe8d..a0a19fa81307 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalDifferenceFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -22,7 +22,7 @@ import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; -public abstract class IntervalDifferenceFunction { +public abstract class DifferenceIntervalFunction { @Override public abstract int hashCode(); @@ -35,70 +35,89 @@ public abstract class IntervalDifferenceFunction { public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); - public static final IntervalDifferenceFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { + public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return nonOverlapping(minuend, subtrahend); + return new NonOverlappingIterator(minuend, subtrahend); } }; - public static IntervalIterator nonOverlapping(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NonOverlappingIterator(minuend, subtrahend); - } + public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NotContainingIterator(minuend, subtrahend); + } + }; - private static class NonOverlappingIterator implements IntervalIterator { + public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { + @Override + public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { + return new NotContainedByIterator(minuend, subtrahend); + } + }; - final IntervalIterator minuend; - final IntervalIterator subtrahend; - boolean subPositioned; + private static abstract class RelativeIterator implements IntervalIterator { - private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - this.minuend = minuend; - this.subtrahend = subtrahend; + final IntervalIterator a; + final IntervalIterator b; + + boolean bpos; + + RelativeIterator(IntervalIterator a, IntervalIterator b) { + this.a = a; + this.b = b; } @Override public int start() { - return minuend.start(); + return a.start(); } @Override public int end() { - return minuend.end(); + return a.end(); } @Override public int innerWidth() { - return minuend.innerWidth(); + return a.innerWidth(); } @Override public boolean reset(int doc) throws IOException { - subPositioned = subtrahend.reset(doc); - if (subPositioned) - subPositioned = subtrahend.nextInterval() != NO_MORE_INTERVALS; - return minuend.reset(doc); + bpos = b.reset(doc); + if (bpos) + bpos = b.nextInterval() != NO_MORE_INTERVALS; + return a.reset(doc); + } + + } + + private static class NonOverlappingIterator extends RelativeIterator { + + private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + super(minuend, subtrahend); } @Override public int nextInterval() throws IOException { - if (subPositioned == false) - return minuend.nextInterval(); - while (minuend.nextInterval() != NO_MORE_INTERVALS) { - while (subtrahend.end() < minuend.start()) { - if (subtrahend.nextInterval() == NO_MORE_INTERVALS) { - subPositioned = false; - return minuend.start(); + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.start()) { + if (b.nextInterval() == NO_MORE_INTERVALS) { + bpos = false; + return a.start(); } } - if (subtrahend.start() > minuend.end()) - return minuend.start(); + if (b.start() > a.end()) + return a.start(); } return NO_MORE_INTERVALS; } } - public static class NotWithinFunction extends IntervalDifferenceFunction { + public static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; @@ -161,11 +180,58 @@ public int nextInterval() throws IOException { } } - private static abstract class SingletonFunction extends IntervalDifferenceFunction { + private static class NotContainingIterator extends RelativeIterator { + + private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { + super(minuend, subtrahend); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.start() < a.start() && b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) { + bpos = false; + return a.start(); + } + } + if (b.start() > a.end()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + + } + + private static class NotContainedByIterator extends RelativeIterator { + + NotContainedByIterator(IntervalIterator a, IntervalIterator b) { + super(a, b); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return a.nextInterval(); + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return a.start(); + } + if (a.start() < b.start()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + } + + private static abstract class SingletonFunction extends DifferenceIntervalFunction { private final String name; - protected SingletonFunction(String name) { + SingletonFunction(String name) { this.name = name; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java similarity index 93% rename from lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java rename to lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index 8d564942e864..a971f909295f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ContainingIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -27,14 +27,14 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; -public class ContainingIntervalQuery extends Query { +public class DifferenceIntervalQuery extends Query { private final Query minuend; private final Query subtrahend; - private final IntervalDifferenceFunction function; + private final DifferenceIntervalFunction function; private final String field; - protected ContainingIntervalQuery(String field, Query minuend, Query subtrahend, IntervalDifferenceFunction function) { + protected DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; @@ -59,7 +59,7 @@ public Query rewrite(IndexReader reader) throws IOException { Query rewrittenMinuend = minuend.rewrite(reader); Query rewrittenSubtrahend = subtrahend.rewrite(reader); if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new ContainingIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); + return new DifferenceIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); } return this; } @@ -68,7 +68,7 @@ public Query rewrite(IndexReader reader) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - ContainingIntervalQuery that = (ContainingIntervalQuery) o; + DifferenceIntervalQuery that = (DifferenceIntervalQuery) o; return Objects.equals(minuend, that.minuend) && Objects.equals(subtrahend, that.subtrahend) && Objects.equals(function, that.function); @@ -89,7 +89,7 @@ private class IntervalDifferenceWeight extends Weight { private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, Similarity similarity, Similarity.SimScorer simScorer) { - super(ContainingIntervalQuery.this); + super(DifferenceIntervalQuery.this); this.minuendWeight = minuendWeight; this.subtrahendWeight = subtrahendWeight; this.scoreMode = scoreMode; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 5a94a25d0a33..ef5d55b3b638 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -298,6 +298,106 @@ public int nextInterval() throws IOException { } + public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { + @Override + public IntervalIterator apply(List iterators) { + if (iterators.size() != 2) + throw new IllegalStateException("CONTAINING function requires two iterators"); + IntervalIterator a = iterators.get(0); + IntervalIterator b = iterators.get(1); + return new IntervalIterator() { + + boolean bpos; + + @Override + public int start() { + return a.start(); + } + + @Override + public int end() { + return a.end(); + } + + @Override + public int innerWidth() { + return a.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + bpos = b.reset(doc); + return a.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return NO_MORE_INTERVALS; + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.start() < a.start() && b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (a.start() <= b.start() && a.end() >= b.end()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + }; + } + }; + + public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { + @Override + public IntervalIterator apply(List iterators) { + if (iterators.size() != 2) + throw new IllegalStateException("CONTAINED_BY function requires two iterators"); + IntervalIterator a = iterators.get(0); + IntervalIterator b = iterators.get(1); + return new IntervalIterator() { + + boolean bpos; + + @Override + public int start() { + return a.start(); + } + + @Override + public int end() { + return a.end(); + } + + @Override + public int innerWidth() { + return a.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + bpos = b.reset(doc); + return a.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + if (bpos == false) + return NO_MORE_INTERVALS; + while (a.nextInterval() != NO_MORE_INTERVALS) { + while (b.end() < a.end()) { + if (b.nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (b.start() <= a.start()) + return a.start(); + } + return NO_MORE_INTERVALS; + } + }; + } + }; + private static abstract class SingletonFunction extends IntervalFunction { private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index fb9476350618..9e804dde549e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -99,7 +99,7 @@ public int innerWidth() { public boolean reset(int doc) throws IOException { // inner iterator already reset() in TwoPhaseIterator.matches() started = false; - return true; + return doc == docID(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index ab12bad0d655..43ffb74a9bee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -17,12 +17,7 @@ package org.apache.lucene.search; -import java.io.IOException; import java.util.Arrays; -import java.util.List; - -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.util.PriorityQueue; public final class Intervals { @@ -53,12 +48,27 @@ public static Query unorderedQuery(String field, Query... subQueries) { } public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, IntervalDifferenceFunction.NON_OVERLAPPING); + return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { - return new ContainingIntervalQuery(field, minuend, subtrahend, new IntervalDifferenceFunction.NotWithinFunction(positions)); + return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); + } + + public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { + return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); + } + + public static Query containingQuery(String field, Query big, Query small) { + return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); + } + + public static Query notContainedByQuery(String field, Query small, Query big) { + return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } + public static Query containedByQuery(String field, Query small, Query big) { + return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index fa30610d7875..8de0afa3ac9a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -61,10 +61,10 @@ public void tearDown() throws Exception { private String[] docFields = { "w1 w2 w3 w4 w5", "w1 w3 w2 w3", - "w1 xx w2 yy w3", + "w1 xx w2 w4 yy w3", "w1 w3 xx w2 yy w3", "w2 w1", - "w2 w1 w3 w2" + "w2 w1 w3 w2 w4" }; private void checkHits(Query query, int[] results) throws IOException { @@ -119,12 +119,12 @@ public void testUnorderedQuery() throws IOException { checkHits(q, new int[]{0, 1, 2, 3, 5}); } - public void testNotContainingQuery() throws IOException { + public void testNonOverlappingQuery() throws IOException { Query q = Intervals.nonOverlappingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w4")))); - checkHits(q, new int[]{0, 2, 4, 5}); + checkHits(q, new int[]{1, 3, 5}); } public void testNotWithinQuery() throws IOException { @@ -133,4 +133,38 @@ public void testNotWithinQuery() throws IOException { checkHits(q, new int[]{ 1, 2, 3 }); } + public void testNotContainingQuery() throws IOException { + Query q = Intervals.notContainingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{ 0, 2, 4, 5 }); + } + + public void testContainingQuery() throws IOException { + Query q = Intervals.containingQuery(field, + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), + new TermQuery(new Term(field, "w3"))); + + checkHits(q, new int[]{ 1, 3, 5 }); + } + + public void testContainedByQuery() throws IOException { + Query q = Intervals.containedByQuery(field, + new TermQuery(new Term(field, "w3")), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2")))); + checkHits(q, new int[]{ 1, 3, 5 }); + } + + public void testNotContainedByQuery() throws IOException { + Query q = Intervals.notContainedByQuery(field, + new TermQuery(new Term(field, "w2")), + Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w4")))); + checkHits(q, new int[]{ 1, 3, 4, 5 }); + } + // contained-by + // not-contained-by + + // TODO: Overlapping + } From b2b11106dc40a2cdc582687104a22ca024cac904 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 27 Feb 2018 16:50:48 +0000 Subject: [PATCH 50/83] Add intervals to exact phrase scorer --- .../lucene/search/CachedIntervalIterator.java | 64 +++++++++++++ .../lucene/search/ExactPhraseScorer.java | 91 ++++++++++++++++++- .../apache/lucene/search/IntervalScorer.java | 35 +------ .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../lucene/search/TestIntervalQuery.java | 7 ++ .../apache/lucene/search/TestIntervals.java | 13 +++ 7 files changed, 174 insertions(+), 40 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java new file mode 100644 index 000000000000..e035d33594d2 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +class CachedIntervalIterator implements IntervalIterator { + + final IntervalIterator in; + final Scorer scorer; + + boolean started = false; + + CachedIntervalIterator(IntervalIterator in, Scorer scorer) { + this.in = in; + this.scorer = scorer; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + // inner iterator already reset() in TwoPhaseIterator.matches() + started = false; + return doc == scorer.docID(); + } + + @Override + public int nextInterval() throws IOException { + if (started == false) { + started = true; + return start(); + } + return in.nextInterval(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index d68f8557dd10..5aec1e64ff62 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -38,6 +38,7 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private final DocIdSetIterator conjunction; private final PostingsAndPosition[] postings; + private final String field; private int freq; @@ -46,13 +47,17 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private float matchCost; private float minCompetitiveScore; - ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + private final IntervalIterator intervals; + + ExactPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, LeafSimScorer docScorer, ScoreMode scoreMode, float matchCost) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = scoreMode.needsScores(); this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES; + this.field = field; + this.intervals = new ExactPhraseIntervals(); List iterators = new ArrayList<>(); List postingsAndPositions = new ArrayList<>(); @@ -86,7 +91,9 @@ public boolean matches() throws IOException { return false; } } - return phraseFreq() > 0; + freq = -1; + intervals.reset(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override @@ -106,7 +113,8 @@ public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - final int freq() { + final int freq() throws IOException { + ensureFreq(); return freq; } @@ -117,6 +125,7 @@ public int docID() { @Override public float score() throws IOException { + ensureFreq(); return docScorer.score(docID(), freq); } @@ -127,7 +136,18 @@ public float getMaxScore(int upTo) throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + if (this.field.equals(field) == false) + return null; + return new CachedIntervalIterator(intervals, this); + } + + private void ensureFreq() throws IOException { + if (freq == -1) { + freq = 1; + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + freq++; + } + } } /** Advance the given pos enum to the first doc on or after {@code target}. @@ -145,6 +165,69 @@ private static boolean advancePosition(PostingsAndPosition posting, int target) return true; } + private class ExactPhraseIntervals implements IntervalIterator { + + @Override + public int start() { + return postings[0].pos; + } + + @Override + public int end() { + return postings[postings.length - 1].pos; + } + + @Override + public int innerWidth() { + return 0; + } + + @Override + public boolean reset(int doc) throws IOException { + if (conjunction.docID() != doc) + return false; + for (PostingsAndPosition posting : postings) { + posting.freq = posting.postings.freq(); + posting.pos = -1; + posting.upTo = 0; + } + return true; + } + + @Override + public int nextInterval() throws IOException { + final PostingsAndPosition lead = postings[0]; + if (lead.upTo == lead.freq) + return Intervals.NO_MORE_INTERVALS; + + lead.pos = lead.postings.nextPosition(); + lead.upTo += 1; + + advanceHead: + while (true) { + final int phrasePos = lead.pos - lead.offset; + for (int j = 1; j < postings.length; ++j) { + final PostingsAndPosition posting = postings[j]; + final int expectedPos = phrasePos + posting.offset; + + // advance up to the same position as the lead + if (advancePosition(posting, expectedPos) == false) { + return Intervals.NO_MORE_INTERVALS; + } + + if (posting.pos != expectedPos) { // we advanced too far + if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { + continue advanceHead; + } else { + return Intervals.NO_MORE_INTERVALS; + } + } + } + return lead.pos; + } + } + } + private int phraseFreq() throws IOException { // reset state final PostingsAndPosition[] postings = this.postings; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 9e804dde549e..2ac445efb4f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -77,40 +77,7 @@ private void ensureFreq() throws IOException { @Override public IntervalIterator intervals(String field) { if (this.field.equals(field)) - return new IntervalIterator() { - boolean started = false; - - @Override - public int start() { - return intervals.start(); - } - - @Override - public int end() { - return intervals.end(); - } - - @Override - public int innerWidth() { - return intervals.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - // inner iterator already reset() in TwoPhaseIterator.matches() - started = false; - return doc == docID(); - } - - @Override - public int nextInterval() throws IOException { - if (started == false) { - started = true; - return start(); - } - return intervals.nextInterval(); - } - }; + return new CachedIntervalIterator(intervals, this); return null; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 2b6bde8a1daa..cae6cb3c67e9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -292,7 +292,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { - return new ExactPhraseScorer(this, postingsFreqs, + return new ExactPhraseScorer(this, field, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 360b0175061e..4e0fb43f37e8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -435,7 +435,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { // optimize exact case - return new ExactPhraseScorer(this, postingsFreqs, + return new ExactPhraseScorer(this, field, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 8de0afa3ac9a..23aa37f655b3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -114,6 +114,13 @@ public void testNestedOrderedNearQuery() throws IOException { checkHits(q, new int[]{0, 1, 2}); } + public void testNearPhraseQuery() throws IOException { + Query q = Intervals.unorderedQuery(field, + new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).build(), + new TermQuery(new Term(field, "w4"))); + checkHits(q, new int[]{ 5 }); + } + public void testUnorderedQuery() throws IOException { Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index e6169823c829..8dc190c9e8cd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -125,6 +125,19 @@ public void testTermQueryIntervals() throws IOException { }); } + public void testExactPhraseQueryIntervals() throws IOException { + checkIntervals(new PhraseQuery.Builder() + .add(new Term("field1", "pease")) + .add(new Term("field1", "porridge")).build(), "field1", 3, new int[][]{ + {}, + { 0, 1, 3, 4, 6, 7 }, + { 0, 1, 3, 4, 6, 7 }, + {}, + { 0, 1, 3, 4, 6, 7 }, + {} + }); + } + public void testOrderedNearIntervals() throws IOException { checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), From bb1e8783dc1e7e38548292daec1fe02cd6596e64 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 27 Feb 2018 18:39:58 +0000 Subject: [PATCH 51/83] Add intervals to sloppy phrase scorer --- .../lucene/search/MultiPhraseQuery.java | 2 +- .../apache/lucene/search/PhrasePositions.java | 6 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../lucene/search/SloppyPhraseScorer.java | 110 ++++++++++++++++-- .../lucene/search/TestIntervalQuery.java | 7 ++ .../apache/lucene/search/TestIntervals.java | 17 +++ 6 files changed, 133 insertions(+), 11 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index cae6cb3c67e9..7df670d73f19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -296,7 +296,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, + return new SloppyPhraseScorer(this, field, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index 640cd5f20e45..d39cec2293e9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -24,7 +24,8 @@ * Position of a term in a document that takes into account the term offset within the phrase. */ final class PhrasePositions { - int position; // position in doc + int realPosition; // position in doc + int position; // position in phrase int count; // remaining pos in this doc int offset; // position in phrase final int ord; // unique across all PhrasePositions instances @@ -54,7 +55,8 @@ final void firstPosition() throws IOException { */ final boolean nextPosition() throws IOException { if (count-- > 0) { // read subsequent pos's - position = postings.nextPosition() - offset; + realPosition = postings.nextPosition(); + position = realPosition - offset; return true; } else return false; diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 4e0fb43f37e8..a4ff6150815d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -439,7 +439,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, + return new SloppyPhraseScorer(this, field, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 20a375955410..a103af73fa7e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -32,6 +32,8 @@ final class SloppyPhraseScorer extends Scorer { private final DocIdSetIterator conjunction; private final PhrasePositions[] phrasePositions; + private final IntervalIterator intervals; + private final String field; private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). @@ -40,6 +42,8 @@ final class SloppyPhraseScorer extends Scorer { private final int slop; private final int numPostings; private final PhraseQueue pq; // for advancing min position + + private int start, currentEnd, nextEnd; private int end; // current largest phrase position @@ -53,13 +57,15 @@ final class SloppyPhraseScorer extends Scorer { final boolean needsScores; private final float matchCost; - SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + SloppyPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, int slop, LeafSimScorer docScorer, boolean needsScores, float matchCost) { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; this.slop = slop; + this.field = field; + this.intervals = new SloppyIntervalIterator(); this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length]; @@ -73,6 +79,74 @@ final class SloppyPhraseScorer extends Scorer { this.matchCost = matchCost; } + private class SloppyIntervalIterator implements IntervalIterator { + + @Override + public int start() { + return start; + } + + @Override + public int end() { + return currentEnd; + } + + @Override + public int innerWidth() { + return currentEnd - start; + } + + @Override + public boolean reset(int doc) throws IOException { + start = currentEnd = nextEnd = -1; + return initPhrasePositions(); + } + + @Override + public int nextInterval() throws IOException { + if (pq.size() < phrasePositions.length) + return Intervals.NO_MORE_INTERVALS; + currentEnd = nextEnd; + PhrasePositions pp = pq.pop(); + start = pp.realPosition; + int matchLength = end - pp.position; + int next = pq.top().position; + int nextStart = pq.top().realPosition; + while (advancePP(pp)) { + if (hasRpts && !advanceRpts(pp)) { + break; // pps exhausted + } + if (pp.position > next) { // done minimizing current match-length + if (matchLength <= slop) { + pq.add(pp); + if (pp.realPosition > nextEnd) + nextEnd = pp.realPosition; + return start; + } + pq.add(pp); + pp = pq.pop(); + next = pq.top().position; + matchLength = end - pp.position; + } else { + int matchLength2 = end - pp.position; + if (matchLength2 < matchLength) { + matchLength = matchLength2; + } + if (pp.realPosition > nextStart) { + start = nextStart; + } + else { + start = pp.realPosition; + } + } + } + if (matchLength <= slop) { + return start; + } + return Intervals.NO_MORE_INTERVALS; + } + } + /** * Score a candidate doc for all slop-valid position-combinations (matches) * encountered while traversing/hopping the PhrasePositions. @@ -242,6 +316,9 @@ private void initSimple() throws IOException { if (pp.position > end) { end = pp.position; } + if (pp.realPosition > nextEnd) { + nextEnd = pp.realPosition; + } pq.add(pp); } } @@ -271,6 +348,9 @@ private void fillQueue() { if (pp.position > end) { end = pp.position; } + if (pp.realPosition > nextEnd) { + nextEnd = pp.realPosition; + } pq.add(pp); } } @@ -515,11 +595,13 @@ private HashMap termGroups(LinkedHashMap tord, Array return tg; } - int freq() { + int freq() throws IOException { + ensureFreq(); return numMatches; } - float sloppyFreq() { + float sloppyFreq() throws IOException { + ensureFreq(); return sloppyFreq; } @@ -544,7 +626,17 @@ float sloppyFreq() { // } // } - + private void ensureFreq() throws IOException { + if (sloppyFreq == -1) { + numMatches = 1; + sloppyFreq = intervals.score(); + while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + sloppyFreq += intervals.score(); + numMatches++; + } + } + } + @Override public int docID() { return conjunction.docID(); @@ -552,6 +644,7 @@ public int docID() { @Override public float score() throws IOException { + ensureFreq(); return docScorer.score(docID(), sloppyFreq); } @@ -565,7 +658,9 @@ public float getMaxScore(int upTo) throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit. this will be fun + if (this.field.equals(field)) + return new CachedIntervalIterator(intervals, this); + return null; } @Override @@ -573,8 +668,9 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { @Override public boolean matches() throws IOException { - sloppyFreq = phraseFreq(); // check for phrase - return sloppyFreq != 0F; + sloppyFreq = -1; + intervals.reset(docID()); + return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 23aa37f655b3..535d31dd0fa2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -121,6 +121,13 @@ public void testNearPhraseQuery() throws IOException { checkHits(q, new int[]{ 5 }); } + public void testSloppyPhraseQuery() throws IOException { + Query q = Intervals.unorderedQuery(field, + new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).setSlop(2).build(), + new TermQuery(new Term(field, "w4"))); + checkHits(q, new int[]{ 0, 5 }); + } + public void testUnorderedQuery() throws IOException { Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 8dc190c9e8cd..d86ac68c774e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,6 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -138,6 +139,22 @@ public void testExactPhraseQueryIntervals() throws IOException { }); } + public void testSloppyPhraseQueryIntervals() throws IOException { + checkIntervals(new PhraseQuery.Builder() + .add(new Term("field1", "pease")) + .add(new Term("field1", "porridge")) + .add(new Term("field1", "hot")) + .setSlop(3).build(), "field1", 3, new int[][]{ + {}, + { 0, 2, 1, 3, 2, 4 }, + { 0, 5, 3, 5, 3, 7, 5, 7 }, + {}, + { 0, 2, 1, 3, 2, 4 }, + {} + } + ); + } + public void testOrderedNearIntervals() throws IOException { checkIntervals(Intervals.orderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), From 529bba346abf1bcf7ab06596cfc1ec3d522eb7ac Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 19:28:05 +0000 Subject: [PATCH 52/83] Add test for boolean exclusion combinations --- .../apache/lucene/search/BooleanQuery.java | 2 +- .../lucene/search/CachedIntervalIterator.java | 22 +------ .../search/DisjunctionIntervalIterator.java | 22 ++----- .../lucene/search/DisjunctionScorer.java | 19 ++++-- .../lucene/search/FilterIntervalIterator.java | 59 +++++++++++++++++++ .../apache/lucene/search/ReqExclScorer.java | 9 ++- .../apache/lucene/search/ReqOptSumScorer.java | 8 ++- .../apache/lucene/search/TestIntervals.java | 33 +++++++++-- 8 files changed, 124 insertions(+), 50 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f52df9fb9cd8..9d89fbed2d86 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -201,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false) { + if (scoreMode.needsScores() == false && scoreMode.useQueryCache()) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java index e035d33594d2..5e5f68a365ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -19,33 +19,17 @@ import java.io.IOException; -class CachedIntervalIterator implements IntervalIterator { +class CachedIntervalIterator extends FilterIntervalIterator { - final IntervalIterator in; final Scorer scorer; - boolean started = false; + private boolean started = false; CachedIntervalIterator(IntervalIterator in, Scorer scorer) { - this.in = in; + super(in); this.scorer = scorer; } - @Override - public int start() { - return in.start(); - } - - @Override - public int end() { - return in.end(); - } - - @Override - public int innerWidth() { - return in.innerWidth(); - } - @Override public boolean reset(int doc) throws IOException { // inner iterator already reset() in TwoPhaseIterator.matches() diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index a7df0b4d59a3..9cd7f2ba6df4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -24,23 +24,17 @@ abstract class DisjunctionIntervalIterator implements IntervalIterator { - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; + protected final PriorityQueue queue; IntervalIterator current; - DisjunctionIntervalIterator(List subIterators) { - this.queue = new PriorityQueue(subIterators.size()) { + DisjunctionIntervalIterator(int iteratorCount) { + this.queue = new PriorityQueue(iteratorCount) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } } @Override @@ -58,18 +52,12 @@ public int innerWidth() { return current.innerWidth(); } - protected abstract void positionSubIntervals() throws IOException; + protected abstract void fillQueue(int doc) throws IOException; @Override public boolean reset(int doc) throws IOException { - positionSubIntervals(); queue.clear(); - for (IntervalIterator subIterator : subIterators) { - if (subIterator.reset(doc)) { - subIterator.nextInterval(); - queue.add(subIterator); - } - } + fillQueue(doc); current = null; return queue.size() > 0; } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 9e5ab2813fd6..e0dcf8845e39 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -20,7 +20,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.util.PriorityQueue; @@ -182,18 +185,24 @@ public final float score() throws IOException { @Override public IntervalIterator intervals(String field) { - List subIntervals = new ArrayList<>(); + Map subIntervals = new IdentityHashMap<>(); for (DisiWrapper dw : subScorers) { IntervalIterator subIt = dw.scorer.intervals(field); if (subIt != null) - subIntervals.add(subIt); + subIntervals.put(dw, subIt); } if (subIntervals.size() == 0) return null; - return new DisjunctionIntervalIterator(subIntervals) { + return new DisjunctionIntervalIterator(subIntervals.size()) { @Override - protected void positionSubIntervals() throws IOException { - getSubMatches(); + protected void fillQueue(int doc) throws IOException { + for (DisiWrapper dw = getSubMatches(); dw != null; dw = dw.next) { + IntervalIterator it = subIntervals.get(dw); + if (it.reset(doc)) { + it.nextInterval(); + queue.add(it); + } + } } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java new file mode 100644 index 000000000000..4ca9f6e01aa3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +public abstract class FilterIntervalIterator implements IntervalIterator { + + protected final IntervalIterator in; + + protected FilterIntervalIterator(IntervalIterator in) { + this.in = in; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int innerWidth() { + return in.innerWidth(); + } + + @Override + public boolean reset(int doc) throws IOException { + return in.reset(doc); + } + + @Override + public int nextInterval() throws IOException { + return in.nextInterval(); + } + + @Override + public float score() { + return in.score(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index aa9108512190..f4f91b22e0db 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -63,7 +63,14 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { @Override public IntervalIterator intervals(String field) { - return reqScorer.intervals(field); + return new FilterIntervalIterator(reqScorer.intervals(field)) { + @Override + public boolean reset(int doc) throws IOException { + if (doc == ReqExclScorer.this.docID()) + return in.reset(doc); + return false; + } + }; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 5a502024d781..3069cb1ec2da 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -192,10 +192,14 @@ public IntervalIterator intervals(String field) { return reqIntervals; if (reqIntervals == null) return optIntervals; - return new DisjunctionIntervalIterator(Arrays.asList(reqIntervals, optIntervals)) { + return new DisjunctionIntervalIterator(2) { @Override - protected void positionSubIntervals() throws IOException { + protected void fillQueue(int doc) throws IOException { + reqIntervals.reset(doc); + queue.add(reqIntervals); positionOptionalScorers(); + if (optIntervals.reset(doc)) + queue.add(optIntervals); } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index d86ac68c774e..ce59c804653a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -42,9 +42,9 @@ public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", - "Pease porridge cold, pease porridge hot, pease porridge in the pot nine days old. Some like it cold, some like it hot, some like it in the pot nine days old", + "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the pot", "Nor here, nowt hot going on in pease this one", - "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold", + "Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve", "Porridge is great" }; @@ -99,7 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -198,8 +198,8 @@ public void testIntervalDisjunction() throws IOException { public void testNesting() throws IOException { checkIntervals(Intervals.unorderedQuery("field1", 100, new TermQuery(new Term("field1", "pease")), - new TermQuery(new Term("field1", "porridge")), - new BooleanQuery.Builder() + new TermQuery(new Term("field1", "porridge")), + new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) .build()), "field1", 3, new int[][]{ @@ -211,4 +211,27 @@ public void testNesting() throws IOException { {} }); } + + // x near ((a not b) or (c not d)) + public void testBooleans() throws IOException { + checkIntervals(Intervals.unorderedQuery("field1", + new TermQuery(new Term("field1", "pease")), + new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field1", "years")), BooleanClause.Occur.MUST_NOT) + .build(), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.MUST_NOT) + .build(), BooleanClause.Occur.SHOULD) + .build()), "field1", 2, new int[][]{ + {}, + { 6, 11 }, + {}, + {}, + { 6, 21 }, + {} + }); + } } From a972bd5952cb774877391a9fbe9419667b82f78e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 19:30:02 +0000 Subject: [PATCH 53/83] ScoreMode.canUseCache() -> ScoreMode.needsPositions() --- .../src/java/org/apache/lucene/search/BooleanQuery.java | 2 +- .../src/java/org/apache/lucene/search/IndexSearcher.java | 2 +- .../core/src/java/org/apache/lucene/search/ScoreMode.java | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 9d89fbed2d86..ee89ca11de28 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -201,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false && scoreMode.useQueryCache()) { + if (scoreMode.needsScores() == false && scoreMode.needsPositions() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index c23d3da347d8..5ab5adec1a84 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -697,7 +697,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.useQueryCache() && queryCache != null) { + if (scoreMode.needsPositions() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 2c014efc31d8..ea3ed9f3648b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -61,8 +61,8 @@ public boolean needsScores() { } @Override - public boolean useQueryCache() { - return false; + public boolean needsPositions() { + return true; } @Override @@ -94,8 +94,8 @@ public int minRequiredPostings() { public abstract int minRequiredPostings(); - public boolean useQueryCache() { - return !needsScores(); + public boolean needsPositions() { + return needsScores(); } } From c810e696be3e2a49dc3c090b890d4a8f1d073fd5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 20:12:24 +0000 Subject: [PATCH 54/83] Add intervals to ConjunctionScorer --- .../lucene/search/ConjunctionScorer.java | 21 +++++++++++++++- .../apache/lucene/search/TestIntervals.java | 24 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index f820cd0aaa8e..93e03f3c4177 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.List; /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { @@ -103,7 +104,25 @@ public Collection getChildren() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + List subIntervals = new ArrayList<>(); + for (Scorer scorer : required) { + IntervalIterator it = scorer.intervals(field); + if (it != null) { + subIntervals.add(it); + } + } + if (subIntervals.size() == 0) { + return null; + } + return new DisjunctionIntervalIterator(subIntervals.size()) { + @Override + protected void fillQueue(int doc) throws IOException { + for (IntervalIterator it : subIntervals) { + it.reset(doc); + queue.add(it); + } + } + }; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index ce59c804653a..2c2ebb48093d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -213,7 +213,7 @@ public void testNesting() throws IOException { } // x near ((a not b) or (c not d)) - public void testBooleans() throws IOException { + public void testExclusionBooleans() throws IOException { checkIntervals(Intervals.unorderedQuery("field1", new TermQuery(new Term("field1", "pease")), new BooleanQuery.Builder() @@ -234,4 +234,26 @@ public void testBooleans() throws IOException { {} }); } + + public void testConjunctionBooleans() throws IOException { + checkIntervals(Intervals.unorderedQuery("field1", + new TermQuery(new Term("field1", "pease")), + new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field2", "caverns")), BooleanClause.Occur.MUST) + .build(), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("field2", "sunless")), BooleanClause.Occur.MUST) + .build(), BooleanClause.Occur.SHOULD) + .build()), "field1", 2, new int[][]{ + {}, + { 6, 11 }, + { 6, 11 }, + {}, + {}, + {} + }); + } } From f3d73e49e792ff94bae686f629ec878b039d3544 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Feb 2018 22:17:26 +0000 Subject: [PATCH 55/83] Minimum-should-match --- .../lucene/search/DisjunctionScorer.java | 7 +--- .../lucene/search/IntervalFunction.java | 16 +++++---- .../search/MinShouldMatchSumScorer.java | 33 ++++++++++++++++--- .../apache/lucene/search/TestIntervals.java | 21 +++++++++++- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index e0dcf8845e39..47d37af01f3a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -130,18 +130,13 @@ public boolean matches() throws IOException { // implicitly verified, move it to verifiedMatches w.next = verifiedMatches; verifiedMatches = w; - - if (needsScores == false) { - // we can stop here - return true; - } } else { unverifiedMatches.add(w); } w = next; } - if (verifiedMatches != null) { + if (verifiedMatches != null || needsScores == false) { return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index ef5d55b3b638..9892caa1c400 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -251,12 +251,16 @@ public boolean reset(int doc) throws IOException { this.queueEnd = start = end = innerEnd = innerStart = -1; boolean positioned = true; for (IntervalIterator subIterator : subIterators) { - positioned &= subIterator.reset(doc); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); + if (subIterator.reset(doc)) { + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); + } + } + else { + positioned = false; } } return positioned; diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index 98df563a61ba..fad7d9c99989 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import java.util.stream.LongStream; import java.util.stream.StreamSupport; @@ -126,11 +128,6 @@ public final Collection getChildren() throws IOException { return matchingChildren; } - @Override - public IntervalIterator intervals(String field) { - return null; // nocommit - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -330,6 +327,32 @@ public float score() throws IOException { return (float) score; } + @Override + public IntervalIterator intervals(String field) { + Map its = new IdentityHashMap<>(); + for (DisiWrapper s = lead; s != null; s = s.next) { + IntervalIterator it = s.scorer.intervals(field); + if (it != null) { + its.put(s, it); + } + } + if (its.size() == 0) + return null; + return new DisjunctionIntervalIterator(its.size()) { + @Override + protected void fillQueue(int doc) throws IOException { + updateFreq(); + for (DisiWrapper s = lead; s != null; s = s.next) { + IntervalIterator it = its.get(s); + if (it.reset(doc)) { + it.nextInterval(); + queue.add(it); + } + } + } + }; + } + @Override public float getMaxScore(int upTo) throws IOException { // TODO: implement but be careful about floating-point errors. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 2c2ebb48093d..291494e9787f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -42,7 +42,7 @@ public class TestIntervals extends LuceneTestCase { private static String field1_docs[] = { "Nothing of interest to anyone here", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", - "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the pot", + "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the fraggle", "Nor here, nowt hot going on in pease this one", "Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve", "Porridge is great" @@ -256,4 +256,23 @@ public void testConjunctionBooleans() throws IOException { {} }); } + + public void testMinimumShouldMatch() throws IOException { + checkIntervals(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term("field1", "porridge")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field1", "fraggle")), BooleanClause.Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(), BooleanClause.Occur.SHOULD) + .build(), "field1", 4, new int[][]{ + {}, + { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 29, 29 }, + { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 27, 27 }, + { 7, 7 }, + { 0, 0, 3, 3, 6, 6 }, + {} + }); + } } From a15b4ca1960ccef0d2cf429344b2adee55667dc3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 16:29:27 +0000 Subject: [PATCH 56/83] Javadocs --- .../search/DifferenceIntervalFunction.java | 27 +++- .../search/DifferenceIntervalQuery.java | 2 +- .../search/DisjunctionIntervalIterator.java | 5 +- .../lucene/search/ExactPhraseScorer.java | 10 +- .../apache/lucene/search/IntervalFilter.java | 2 +- .../lucene/search/IntervalFunction.java | 43 +++++- .../lucene/search/IntervalIterator.java | 41 ++++- .../apache/lucene/search/IntervalScorer.java | 4 +- .../org/apache/lucene/search/Intervals.java | 146 +++++++++++++++++- .../java/org/apache/lucene/search/Scorer.java | 12 ++ .../lucene/search/SloppyPhraseScorer.java | 8 +- .../org/apache/lucene/search/TermScorer.java | 2 - .../apache/lucene/search/TestIntervals.java | 2 +- 13 files changed, 271 insertions(+), 33 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index a0a19fa81307..218e493a2316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -20,8 +20,10 @@ import java.io.IOException; import java.util.Objects; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - +/** + * A function that takes two interval iterators and combines them to produce a third, + * generally by computing a difference interval between them + */ public abstract class DifferenceIntervalFunction { @Override @@ -33,8 +35,15 @@ public abstract class DifferenceIntervalFunction { @Override public abstract String toString(); + /** + * Combine two interval iterators into a third + */ public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); + /** + * Filters the minuend iterator so that only intervals that do not overlap intervals from the + * subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -42,6 +51,10 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; + /** + * Filters the minuend iterator so that only intervals that do not contain intervals from the + * subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -49,6 +62,10 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; + /** + * Filters the minuend iterator so that only intervals that are not contained by intervals from + * the subtrahend iterator are returned + */ public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { @@ -117,11 +134,15 @@ public int nextInterval() throws IOException { } } + /** + * Filters the minuend iterator so that only intervals that do not occur within a set number + * of positions of intervals from the subtrahend iterator are returned + */ public static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; - public NotWithinFunction(int positions) { + NotWithinFunction(int positions) { this.positions = positions; } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index a971f909295f..3874fe9689b1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -138,7 +138,7 @@ public boolean matches() throws IOException { if (subtrahendScorer.docID() < approximation.docID()) { subtrahendScorer.iterator().advance(approximation.docID()); } - return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 9cd7f2ba6df4..269689bc43cd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.List; import org.apache.lucene.util.PriorityQueue; @@ -71,13 +70,13 @@ public int nextInterval() throws IOException { int start = current.start(), end = current.end(); while (queue.size() > 0 && contains(queue.top(), start, end)) { IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != Intervals.NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); } } if (queue.size() == 0) { current = IntervalIterator.EMPTY; - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } current = queue.top(); return current.start(); diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 5aec1e64ff62..c9e16cf04a7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -93,7 +93,7 @@ public boolean matches() throws IOException { } freq = -1; intervals.reset(docID()); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override @@ -144,7 +144,7 @@ public IntervalIterator intervals(String field) { private void ensureFreq() throws IOException { if (freq == -1) { freq = 1; - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { freq++; } } @@ -198,7 +198,7 @@ public boolean reset(int doc) throws IOException { public int nextInterval() throws IOException { final PostingsAndPosition lead = postings[0]; if (lead.upTo == lead.freq) - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; lead.pos = lead.postings.nextPosition(); lead.upTo += 1; @@ -212,14 +212,14 @@ public int nextInterval() throws IOException { // advance up to the same position as the lead if (advancePosition(posting, expectedPos) == false) { - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } if (posting.pos != expectedPos) { // we advanced too far if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { continue advanceHead; } else { - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 4f33a85861f4..58f6bda3936c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -55,7 +55,7 @@ public final int nextInterval() throws IOException { do { next = in.nextInterval(); } - while (accept() == false && next != Intervals.NO_MORE_INTERVALS); + while (accept() == false && next != IntervalIterator.NO_MORE_INTERVALS); return next; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 9892caa1c400..0a47ca1483fe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -18,14 +18,14 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.Arrays; import java.util.List; import java.util.Objects; import org.apache.lucene.util.PriorityQueue; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - +/** + * Combine a list of {@link IntervalIterator}s into another + */ public abstract class IntervalFunction { @Override @@ -37,8 +37,14 @@ public abstract class IntervalFunction { @Override public abstract String toString(); + /** + * Combine the iterators into another iterator + */ public abstract IntervalIterator apply(List iterators); + /** + * Return an iterator over intervals where the subiterators appear in a given order + */ public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { @@ -46,8 +52,17 @@ public IntervalIterator apply(List intervalIterators) { } }; + /** + * Return an iterator over intervals where the subiterators appear in a given order, + * filtered by width + */ public static class OrderedNearFunction extends IntervalFunction { + /** + * Create a new OrderedNearFunction + * @param minWidth the minimum width of returned intervals + * @param maxWidth the maximum width of returned intervals + */ public OrderedNearFunction(int minWidth, int maxWidth) { this.minWidth = minWidth; this.maxWidth = maxWidth; @@ -81,7 +96,7 @@ public int hashCode() { } } - public static IntervalIterator orderedIntervalIterator(List subIterators) { + private static IntervalIterator orderedIntervalIterator(List subIterators) { for (IntervalIterator it : subIterators) { if (it == IntervalIterator.EMPTY) return IntervalIterator.EMPTY; @@ -157,7 +172,9 @@ public int nextInterval() throws IOException { } } - + /** + * Return an iterator over intervals where the subiterators appear in any order + */ public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { @@ -165,11 +182,19 @@ public IntervalIterator apply(List intervalIterators) { } }; + /** + * An iterator over intervals where the subiterators appear in any order, within a given width range + */ public static class UnorderedNearFunction extends IntervalFunction { final int minWidth; final int maxWidth; + /** + * Create a new UnorderedNearFunction + * @param minWidth the minimum width of the returned intervals + * @param maxWidth the maximum width of the returned intervals + */ public UnorderedNearFunction(int minWidth, int maxWidth) { this.minWidth = minWidth; this.maxWidth = maxWidth; @@ -201,7 +226,7 @@ public int hashCode() { } } - public static IntervalIterator unorderedIntervalIterator(List subIntervals) { + private static IntervalIterator unorderedIntervalIterator(List subIntervals) { for (IntervalIterator it : subIntervals) { if (it == IntervalIterator.EMPTY) return IntervalIterator.EMPTY; @@ -302,6 +327,9 @@ public int nextInterval() throws IOException { } + /** + * Returns an interval over iterators where the first iterator contains intervals from the second + */ public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { @Override public IntervalIterator apply(List iterators) { @@ -352,6 +380,9 @@ public int nextInterval() throws IOException { } }; + /** + * Return an iterator over intervals where the first iterator is contained by intervals from the second + */ public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { @Override public IntervalIterator apply(List iterators) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index b14211e0eea1..22b6c043ce4f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -19,24 +19,59 @@ import java.io.IOException; -import org.apache.lucene.index.PostingsEnum; - +/** + * Defines methods to iterate over the intervals that a {@link Scorer} matches + * on a document + */ public interface IntervalIterator { + /** + * When returned from {@link #nextInterval()}, indicates that there are no more + * matching intervals on the current document + */ + int NO_MORE_INTERVALS = Integer.MAX_VALUE; + + /** + * The start of the current interval + */ int start(); + /** + * The end of the current interval + */ int end(); + /** + * The width of the current interval + */ int innerWidth(); + /** + * Called to reset the iterator on a new document + * + * @return true if the iterator's parent Scorer is positioned on the given doc id + */ boolean reset(int doc) throws IOException; + /** + * Advance the iterator to the next interval + * + * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if + * there are no more intervals on the current document + */ int nextInterval() throws IOException; + /** + * The score of the current interval + */ default float score() { return (float) (1.0 / (1 + innerWidth())); } + /** + * An empty iterator that always returns {@code false} from {@link #reset(int)} and + * {@link IntervalIterator#NO_MORE_INTERVALS} from {@link #nextInterval()} + */ IntervalIterator EMPTY = new IntervalIterator() { @Override @@ -61,7 +96,7 @@ public boolean reset(int doc) { @Override public int nextInterval() { - return Intervals.NO_MORE_INTERVALS; + return NO_MORE_INTERVALS; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 2ac445efb4f4..8d15dcf626fa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -70,7 +70,7 @@ private void ensureFreq() throws IOException { do { freq += intervals.score(); } - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS); + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS); } } @@ -91,7 +91,7 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.reset(approximation.docID()) && intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 43ffb74a9bee..7a4570ae2c73 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,56 +19,198 @@ import java.util.Arrays; +/** + * Constructor functions for interval-based queries + */ public final class Intervals { - public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - + private Intervals() {} + + /** + * Create an ordered query with a maximum width + * + * Matches documents in which the subqueries all match in the given order, and + * in which the width of the interval over which the queries match is less than + * the defined width + * + * @param field the field to query + * @param width the maximum width of subquery-spanning intervals that will match + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); } + /** + * Create an ordered query with a defined width range + * + * Matches documents in which the subqueries all match in the given order, and in + * which the width of the interval over which the queries match is between the + * minimum and maximum defined widths + * + * @param field the field to query + * @param minWidth the minimum width of subquery-spanning intervals that will match + * @param maxWidth the maximum width of subquery-spanning intervals that will match + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } + /** + * Create an ordered query with an unbounded width range + * + * Matches documents in which the subqueries all match in the given order + * + * @param field the field to query + * @param subQueries an ordered set of subqueries + */ public static Query orderedQuery(String field, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); } + /** + * Create an unordered query with a maximum width + * + * Matches documents in which the subqueries all match in any order, and in which + * the width of the interval over which the queries match is less than the + * defined width + * + * @param field the field to query + * @param width the maximum width of subquery-spanning intervals that will match + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, int width, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); } + /** + * Create an unordered query with a defined width range + * + * Matches documents in which the subqueries all match in any order, and in which + * the width of the interval over which the queries match is between the minimum + * and maximum defined widths + * + * @param field the field to query + * @param minWidth the minimum width of subquery-spanning intervals that will match + * @param maxWidth the maximum width of subquery-spanning intervals that will match + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } + /** + * Create an unordered query with an unbounded width range + * + * Matches documents in which all the subqueries match. This is essence a pure conjunction + * query, but it will expose iterators over those conjunctions that may then be further + * nested in other interval queries + * + * @param field the field to query + * @param subQueries an unordered set of queries + */ public static Query unorderedQuery(String field, Query... subQueries) { return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); } + /** + * Create a non-overlapping query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query overlap with intervals from the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param subtrahend the query to filter by + */ public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } + /** + * Create a not-within query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query appear within a set number of positions of intervals from the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param positions the maximum distance that intervals from the minuend may occur from intervals + * of the subtrahend + * @param subtrahend the query to filter by + */ public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); } + /** + * Create a not-containing query + * + * Matches documents that match the minuend query, except when the intervals of the minuend + * query are contained within an interval of the subtrahend query + * + * Exposes matching intervals from the minuend + * + * @param field the field to query + * @param minuend the query to filter + * @param subtrahend the query to filter by + */ public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); } + /** + * Create a containing query + * + * Matches documents where intervals of the big query contain one or more intervals from + * the small query + * + * Exposes matching intervals from the big query + * + * @param field the field to query + * @param big the query to filter + * @param small the query to filter by + */ public static Query containingQuery(String field, Query big, Query small) { return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); } + /** + * Create a not-contained-by query + * + * Matches documents that match the small query, except when the intervals of the small + * query are contained within an interval of the big query + * + * Exposes matching intervals from the small query + * + * @param field the field to query + * @param small the query to filter + * @param big the query to filter by + */ public static Query notContainedByQuery(String field, Query small, Query big) { return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } + /** + * Create a contained-by query + * + * Matches documents where intervals of the small query occur within intervals of the big query + * + * Exposes matching intervals from the small query + * + * @param field the field to query + * @param small the query to filter + * @param big the query to filter by + */ public static Query containedByQuery(String field, Query small, Query big) { return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } + // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions + } diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 3c05fb194281..9e1d46c8ecfc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,6 +125,18 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); + /** + * Return a {@link IntervalIterator} over matching intervals for a given field + * + * Consumers should call {@link IntervalIterator#reset(int)} when the parent + * Scorer's {@link DocIdSetIterator} has moved to a new document, and then + * iterate over the intervals by repeatedly calling {@link IntervalIterator#nextInterval()} + * until {@link IntervalIterator#NO_MORE_INTERVALS} is returned. + * + * @param field The field to retrieve intervals for + * @return an {@link IntervalIterator}, or {@code null} if no intervals are available + * for the given field + */ public abstract IntervalIterator intervals(String field); /** diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index a103af73fa7e..f5374f9a4f69 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -105,7 +105,7 @@ public boolean reset(int doc) throws IOException { @Override public int nextInterval() throws IOException { if (pq.size() < phrasePositions.length) - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; currentEnd = nextEnd; PhrasePositions pp = pq.pop(); start = pp.realPosition; @@ -143,7 +143,7 @@ public int nextInterval() throws IOException { if (matchLength <= slop) { return start; } - return Intervals.NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } } @@ -630,7 +630,7 @@ private void ensureFreq() throws IOException { if (sloppyFreq == -1) { numMatches = 1; sloppyFreq = intervals.score(); - while (intervals.nextInterval() != Intervals.NO_MORE_INTERVALS) { + while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { sloppyFreq += intervals.score(); numMatches++; } @@ -670,7 +670,7 @@ public TwoPhaseIterator twoPhaseIterator() { public boolean matches() throws IOException { sloppyFreq = -1; intervals.reset(docID()); - return intervals.nextInterval() != Intervals.NO_MORE_INTERVALS; + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index ef9eeeabc026..79f00d30cb57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -24,8 +24,6 @@ import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.TermsEnum; -import static org.apache.lucene.search.Intervals.NO_MORE_INTERVALS; - /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 291494e9787f..1dd032007310 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -98,7 +98,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i int id = (int) ids.longValue(); if (intervals.reset(doc)) { int i = 0, pos; - while ((pos = intervals.nextInterval()) != Intervals.NO_MORE_INTERVALS) { + while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); From b0aa58064472d4f20286254c7d9d2e3359d039b0 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 17:01:13 +0000 Subject: [PATCH 57/83] cleanups --- .../search/BlockMaxConjunctionScorer.java | 2 +- .../apache/lucene/search/BooleanQuery.java | 3 +- .../lucene/search/CachedIntervalIterator.java | 6 ++ .../search/DifferenceIntervalQuery.java | 13 +++- .../org/apache/lucene/search/DisiWrapper.java | 1 - .../search/DisjunctionIntervalIterator.java | 6 ++ .../lucene/search/ExactPhraseScorer.java | 48 ------------ .../apache/lucene/search/IndexSearcher.java | 3 +- .../apache/lucene/search/IntervalFilter.java | 17 ++++ .../apache/lucene/search/IntervalQuery.java | 16 ++-- .../org/apache/lucene/search/Intervals.java | 5 ++ .../org/apache/lucene/search/ScoreMode.java | 14 ++-- .../lucene/search/SloppyPhraseScorer.java | 77 +++++-------------- .../org/apache/lucene/search/WANDScorer.java | 2 +- .../java/org/apache/lucene/search/Weight.java | 2 +- .../apache/lucene/search/TestIntervals.java | 2 +- 16 files changed, 90 insertions(+), 127 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 2c625643ef15..02f4a0f88dda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -242,6 +242,6 @@ public Collection getChildren() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index ee89ca11de28..bfe00e2e4bf4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -33,6 +33,7 @@ import java.util.function.Predicate; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.search.BooleanClause.Occur; /** A Query that matches documents matching boolean combinations of other @@ -201,7 +202,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.needsScores() == false && scoreMode.needsPositions() == false) { + if (scoreMode.minRequiredPostings() == PostingsEnum.NONE) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java index 5e5f68a365ea..5501cffae316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java @@ -19,6 +19,12 @@ import java.io.IOException; +/** + * An interval iterator which caches its first invocation. + * + * Useful for two-phase queries that confirm matches by checking that at least one + * interval exists in a given document + */ class CachedIntervalIterator extends FilterIntervalIterator { final Scorer scorer; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java index 3874fe9689b1..b175fbac7fc8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java @@ -27,6 +27,10 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; +/** + * A query that retrieves documents containing intervals returned from a + * {@link DifferenceIntervalFunction} over a minuend query and a subtrahend query + */ public class DifferenceIntervalQuery extends Query { private final Query minuend; @@ -34,7 +38,14 @@ public class DifferenceIntervalQuery extends Query { private final DifferenceIntervalFunction function; private final String field; - protected DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { + /** + * Create a new DifferenceIntervalQuery + * @param field the field to query + * @param minuend the subquery to filter + * @param subtrahend the subquery to filter by + * @param function a {@link DifferenceIntervalFunction} to combine the minuend and subtrahend + */ + public DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { this.minuend = minuend; this.subtrahend = subtrahend; this.function = function; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 0a581804aaa3..fac9418010f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,6 +81,5 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java index 269689bc43cd..ce3ff22b41b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java @@ -21,6 +21,9 @@ import org.apache.lucene.util.PriorityQueue; +/** + * Implements the minimum-interval OR algorithm + */ abstract class DisjunctionIntervalIterator implements IntervalIterator { protected final PriorityQueue queue; @@ -51,6 +54,9 @@ public int innerWidth() { return current.innerWidth(); } + /** + * Called to repopulate the interval priority queue when moving to a new document + */ protected abstract void fillQueue(int doc) throws IOException; @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index c9e16cf04a7c..c69f175505fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -228,52 +228,4 @@ public int nextInterval() throws IOException { } } - private int phraseFreq() throws IOException { - // reset state - final PostingsAndPosition[] postings = this.postings; - for (PostingsAndPosition posting : postings) { - posting.freq = posting.postings.freq(); - posting.pos = posting.postings.nextPosition(); - posting.upTo = 1; - } - - int freq = 0; - final PostingsAndPosition lead = postings[0]; - - advanceHead: - while (true) { - final int phrasePos = lead.pos - lead.offset; - for (int j = 1; j < postings.length; ++j) { - final PostingsAndPosition posting = postings[j]; - final int expectedPos = phrasePos + posting.offset; - - // advance up to the same position as the lead - if (advancePosition(posting, expectedPos) == false) { - break advanceHead; - } - - if (posting.pos != expectedPos) { // we advanced too far - if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { - continue advanceHead; - } else { - break advanceHead; - } - } - } - - freq += 1; - if (needsScores == false) { - break; - } - - if (lead.upTo == lead.freq) { - break; - } - lead.pos = lead.postings.nextPosition(); - lead.upTo += 1; - } - - return this.freq = freq; - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 5ab5adec1a84..9011183f7aca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -697,7 +698,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.needsPositions() == false && queryCache != null) { + if (scoreMode.minRequiredPostings() == PostingsEnum.NONE && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index 58f6bda3936c..deb842cb1a49 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -19,8 +19,15 @@ import java.io.IOException; +/** + * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function + */ public abstract class IntervalFilter implements IntervalIterator { + /** + * Filter an {@link IntervalIterator} by its outer width, ie the distance between the + * start and end of the iterator + */ public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { return new IntervalFilter(in) { @Override @@ -31,6 +38,10 @@ protected boolean accept() { }; } + /** + * Filter an {@link IntervalIterator} by its inner width, ie the distance between the + * end of its first subiterator and the beginning of its last + */ public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { return new IntervalFilter(in) { @Override @@ -43,10 +54,16 @@ protected boolean accept() { private final IntervalIterator in; + /** + * Create a new filter + */ public IntervalFilter(IntervalIterator in) { this.in = in; } + /** + * @return {@code true} if the wrapped iterator's interval should be passed on + */ protected abstract boolean accept(); @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index dfab7da1e3c7..3c2683cb6046 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -31,17 +31,23 @@ import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.Similarity; +/** + * A query that retrieves documents containing intervals returned from an + * {@link IntervalFunction} over a set of subqueries + */ public final class IntervalQuery extends Query { private final String field; private final List subQueries; private final IntervalFunction iteratorFunction; - protected IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { - this(field, subQueries, null, iteratorFunction); - } - - protected IntervalQuery(String field, List subQueries, Query subtrahend, IntervalFunction iteratorFunction) { + /** + * Create a new IntervalQuery + * @param field the field to query + * @param subQueries the subqueries to generate intervals from + * @param iteratorFunction an {@link IntervalFunction} to combine the intervals from the subqueries + */ + public IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { this.field = field; this.subQueries = subQueries; this.iteratorFunction = iteratorFunction; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 7a4570ae2c73..bc0455289240 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -21,6 +21,11 @@ /** * Constructor functions for interval-based queries + * + * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} + * classes, implementing minimum-interval algorithms taken from the paper + * + * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics */ public final class Intervals { diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index ea3ed9f3648b..815286adddc9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -54,17 +54,15 @@ public int minRequiredPostings() { } }, + /** + * Produced scorers will allow visiting all matches, and expose positions + */ COMPLETE_POSITIONS { @Override public boolean needsScores() { return false; } - @Override - public boolean needsPositions() { - return true; - } - @Override public int minRequiredPostings() { return PostingsEnum.POSITIONS; @@ -92,10 +90,10 @@ public int minRequiredPostings() { */ public abstract boolean needsScores(); + /** + * The minimum flags to be passed to {@link org.apache.lucene.index.TermsEnum#postings(PostingsEnum, int)} + */ public abstract int minRequiredPostings(); - public boolean needsPositions() { - return needsScores(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index f5374f9a4f69..de9546be3938 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -147,63 +147,6 @@ public int nextInterval() throws IOException { } } - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
    The score contribution of a match depends on the distance: - *
    - highest score for distance=0 (exact match). - *
    - score gets lower as distance gets higher. - *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
    Possibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - private float phraseFreq() throws IOException { - if (!initPhrasePositions()) { - return 0.0f; - } - float freq = 0.0f; - numMatches = 0; - PhrasePositions pp = pq.pop(); - int matchLength = end - pp.position; - int next = pq.top().position; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { - break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length - if (matchLength <= slop) { - freq += (1.0 / (1.0 + matchLength)); // score match - numMatches++; - if (!needsScores) { - return freq; - } - } - pq.add(pp); - pp = pq.pop(); - next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { - matchLength = matchLength2; - } - } - } - if (matchLength <= slop) { - freq += (1.0 / (1.0 + matchLength)); // score match - numMatches++; - } - return freq; - } - /** advance a PhrasePosition and update 'end', return false if exhausted */ private boolean advancePP(PhrasePositions pp) throws IOException { if (!pp.nextPosition()) { @@ -625,7 +568,25 @@ float sloppyFreq() throws IOException { // } // } // } - + + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
    The score contribution of a match depends on the distance: + *
    - highest score for distance=0 (exact match). + *
    - score gets lower as distance gets higher. + *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
    Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ private void ensureFreq() throws IOException { if (sloppyFreq == -1) { numMatches = 1; diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f39380b5cd88..f4ef706fcf1d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -442,7 +442,7 @@ private void advanceAllTail() throws IOException { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 244056313f78..3106e1962a59 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -44,7 +44,7 @@ * A Weight is used in the following way: *

      *
    1. A Weight is constructed by a top-level query, given a - * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, org.apache.lucene.search.Query.Postings, float)}). + * IndexSearcher ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). *
    2. A Scorer is constructed by * {@link #scorer(org.apache.lucene.index.LeafReaderContext)}. *
    diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 1dd032007310..7711f13509c8 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,7 +99,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); From 19b8ab1a79b46c06588bf9001b59d8e935164af8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 1 Mar 2018 17:12:02 +0000 Subject: [PATCH 58/83] Expose intervals from SpanScorer --- .../lucene/search/spans/SpanScorer.java | 36 +++++++++++++++++-- .../lucene/search/spans/SpanWeight.java | 2 +- .../apache/lucene/search/TestIntervals.java | 19 +++++++++- .../queries/payloads/PayloadScoreQuery.java | 6 ++-- .../payloads/SpanPayloadCheckQuery.java | 2 +- 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 5fe3fd8dbbf8..2ec0c5d2b372 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -32,6 +32,7 @@ */ public class SpanScorer extends Scorer { + protected final String field; protected final Spans spans; protected final LeafSimScorer docScorer; @@ -42,10 +43,11 @@ public class SpanScorer extends Scorer { private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for /** Sole constructor. */ - public SpanScorer(SpanWeight weight, Spans spans, LeafSimScorer docScorer) { + public SpanScorer(SpanWeight weight, String field, Spans spans, LeafSimScorer docScorer) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; + this.field = field; } /** return the Spans for this Scorer **/ @@ -60,7 +62,9 @@ public int docID() { @Override public IntervalIterator intervals(String field) { - return null; // nocommit + if (this.field.equals(field)) + return new SpanIntervalIterator(); + return null; } @Override @@ -152,4 +156,32 @@ final float sloppyFreq() throws IOException { return freq; } + private class SpanIntervalIterator implements IntervalIterator { + + @Override + public int start() { + return spans.startPosition(); + } + + @Override + public int end() { + return spans.endPosition() - 1; + } + + @Override + public int innerWidth() { + return spans.width(); + } + + @Override + public boolean reset(int doc) throws IOException { + return spans.docID() == doc; + } + + @Override + public int nextInterval() throws IOException { + return spans.nextStartPosition(); + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 25b58fdc39a0..f19ca742076f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -130,7 +130,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, spans, docScorer); + return new SpanScorer(this, field, spans, docScorer); } /** diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 7711f13509c8..339b4f657f62 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -31,6 +31,9 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -99,7 +102,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i if (intervals.reset(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -275,4 +278,18 @@ public void testMinimumShouldMatch() throws IOException { {} }); } + + public void testSpanNearQueryEquivalence() throws IOException { + checkIntervals(new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term("field1", "pease")), + new SpanTermQuery(new Term("field1", "hot"))}, 100, true), + "field1", 3, new int[][]{ + {}, + {0, 2, 3, 17, 6, 17}, + {0, 5, 3, 5, 6, 21}, + {}, + { 0, 2, 3, 17, 6, 17 }, + { } + }); + } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index bd5d927c6275..5b5d5812559b 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -151,7 +151,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); - return new PayloadSpanScorer(this, payloadSpans, docScorer); + return new PayloadSpanScorer(this, field, payloadSpans, docScorer); } @Override @@ -227,8 +227,8 @@ private class PayloadSpanScorer extends SpanScorer { private final PayloadSpans spans; - private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { - super(weight, spans, docScorer); + private PayloadSpanScorer(SpanWeight weight, String field, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { + super(weight, field, spans, docScorer); this.spans = spans; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index a9d3bfb2da9a..dbee623c59b6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -128,7 +128,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, spans, docScorer); + return new SpanScorer(this, field, spans, docScorer); } @Override From d2cbd1955bdf2aa41f2d4fa593afb456ccb6aac7 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 6 Mar 2018 01:50:23 +0000 Subject: [PATCH 59/83] IntervalsSource --- .../java/org/apache/lucene/index/Sorter.java | 25 +++ .../search/BlockMaxConjunctionScorer.java | 4 - .../apache/lucene/search/BooleanQuery.java | 3 +- .../lucene/search/CachingCollector.java | 5 - .../search/ConjunctionIntervalIterator.java | 66 ++++++ .../search/ConjunctionIntervalsSource.java | 77 +++++++ .../lucene/search/ConjunctionScorer.java | 30 +-- .../lucene/search/ConstantScoreScorer.java | 5 - .../search/DifferenceIntervalFunction.java | 27 ++- .../search/DifferenceIntervalQuery.java | 169 --------------- .../search/DifferenceIntervalsSource.java | 74 +++++++ .../org/apache/lucene/search/DisiWrapper.java | 18 ++ .../search/DisjunctionIntervalIterator.java | 95 -------- .../search/DisjunctionIntervalsSource.java | 175 +++++++++++++++ .../lucene/search/DisjunctionScorer.java | 34 +-- .../lucene/search/ExactPhraseScorer.java | 118 ++++------ .../org/apache/lucene/search/FakeScorer.java | 5 - .../lucene/search/FilterIntervalIterator.java | 14 +- .../apache/lucene/search/FilterScorer.java | 5 - .../apache/lucene/search/IndexSearcher.java | 3 +- .../apache/lucene/search/IntervalFilter.java | 25 +-- .../lucene/search/IntervalFunction.java | 77 +++---- .../lucene/search/IntervalIterator.java | 43 +--- .../apache/lucene/search/IntervalQuery.java | 63 ++---- .../apache/lucene/search/IntervalScorer.java | 22 +- .../org/apache/lucene/search/Intervals.java | 202 +++++++++--------- ...rvalIterator.java => IntervalsSource.java} | 36 +--- .../search/MinShouldMatchSumScorer.java | 28 --- .../lucene/search/MultiPhraseQuery.java | 8 +- .../apache/lucene/search/PhrasePositions.java | 6 +- .../org/apache/lucene/search/PhraseQuery.java | 8 +- .../apache/lucene/search/ReqExclScorer.java | 12 -- .../apache/lucene/search/ReqOptSumScorer.java | 35 +-- .../org/apache/lucene/search/ScoreMode.java | 39 ---- .../java/org/apache/lucene/search/Scorer.java | 15 -- .../lucene/search/SloppyPhraseScorer.java | 176 +++++---------- .../apache/lucene/search/SynonymQuery.java | 3 +- .../lucene/search/TermIntervalsSource.java | 127 +++++++++++ .../org/apache/lucene/search/TermQuery.java | 4 +- .../org/apache/lucene/search/TermScorer.java | 69 +----- .../org/apache/lucene/search/WANDScorer.java | 5 - .../java/org/apache/lucene/search/Weight.java | 1 - .../lucene/search/spans/SpanScorer.java | 40 +--- .../lucene/search/spans/SpanWeight.java | 2 +- .../lucene/search/JustCompileSearch.java | 5 - .../search/TestBoolean2ScorerSupplier.java | 5 - .../lucene/search/TestCachingCollector.java | 5 - .../lucene/search/TestConjunctionDISI.java | 5 - .../lucene/search/TestIntervalQuery.java | 80 +++---- .../apache/lucene/search/TestIntervals.java | 148 ++----------- .../lucene/search/TestMinShouldMatch2.java | 5 - .../TestPositiveScoresOnlyCollector.java | 5 - .../lucene/search/TestQueryRescorer.java | 5 - .../TestScoreCachingWrappingScorer.java | 5 - .../lucene/search/TestTopDocsCollector.java | 5 - .../lucene/search/TestTopFieldCollector.java | 7 +- .../queries/payloads/PayloadScoreQuery.java | 6 +- .../payloads/SpanPayloadCheckQuery.java | 2 +- .../apache/lucene/search/AssertingScorer.java | 5 - .../lucene/search/BlockScoreQueryWrapper.java | 4 - .../search/BulkScorerWrapperScorer.java | 5 - .../search/RandomApproximationQuery.java | 5 - 62 files changed, 939 insertions(+), 1366 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java delete mode 100644 lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java delete mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java rename lucene/core/src/java/org/apache/lucene/search/{CachedIntervalIterator.java => IntervalsSource.java} (54%) create mode 100644 lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index a081ea7aaf3b..c47f9a118abb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -445,5 +445,30 @@ public String getID() { public String toString() { return getID(); } + + static final Scorer FAKESCORER = new Scorer(null) { + + float score; + int doc = -1; + + @Override + public int docID() { + return doc; + } + + public DocIdSetIterator iterator() { + throw new UnsupportedOperationException(); + } + + @Override + public float score() throws IOException { + return score; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 02f4a0f88dda..070b6c40f025 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -240,8 +240,4 @@ public Collection getChildren() { return children; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index bfe00e2e4bf4..f52df9fb9cd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -33,7 +33,6 @@ import java.util.function.Predicate; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.search.BooleanClause.Occur; /** A Query that matches documents matching boolean combinations of other @@ -202,7 +201,7 @@ private BooleanQuery rewriteNoScoring() { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; - if (scoreMode.minRequiredPostings() == PostingsEnum.NONE) { + if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index ae705455ecfa..3bed88dd9980 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -64,11 +64,6 @@ public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public final float score() { return score; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java new file mode 100644 index 000000000000..53139fe42ec8 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +abstract class ConjunctionIntervalIterator implements IntervalIterator { + + protected final List subIterators; + + final DocIdSetIterator approximation; + final float cost; + + ConjunctionIntervalIterator(List subIterators) { + this.subIterators = subIterators; + float costsum = 0; + List approximations = new ArrayList<>(); + for (IntervalIterator it : subIterators) { + costsum += it.cost(); + approximations.add(it.approximation()); + } + this.cost = costsum; + this.approximation = ConjunctionDISI.intersectIterators(approximations); + + } + + @Override + public final DocIdSetIterator approximation() { + return approximation; + } + + @Override + public final boolean advanceTo(int doc) throws IOException { + for (IntervalIterator it : subIterators) { + if (it.advanceTo(doc) == false) + return false; + } + reset(); + return true; + } + + protected abstract void reset() throws IOException; + + @Override + public final float cost() { + return cost; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java new file mode 100644 index 000000000000..2ee0422786c9 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class ConjunctionIntervalsSource extends IntervalsSource { + + final List subSources; + final IntervalFunction function; + + ConjunctionIntervalsSource(List subSources, IntervalFunction function) { + this.subSources = subSources; + this.function = function; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ConjunctionIntervalsSource that = (ConjunctionIntervalsSource) o; + return Objects.equals(subSources, that.subSources) && + Objects.equals(function, that.function); + } + + @Override + public String toString() { + return function + subSources.stream().map(Object::toString).collect(Collectors.joining(",", "(", ")")); + } + + @Override + public void extractTerms(String field, Set terms) { + for (IntervalsSource source : subSources) { + source.extractTerms(field, terms); + } + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + List subIntervals = new ArrayList<>(); + for (IntervalsSource source : subSources) { + IntervalIterator it = source.intervals(field, ctx); + if (it == null) + return null; + subIntervals.add(it); + } + return function.apply(subIntervals); + } + + @Override + public int hashCode() { + return Objects.hash(subSources, function); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 93e03f3c4177..7a1b9563721b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.List; /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { @@ -102,27 +101,14 @@ public Collection getChildren() { return children; } - @Override - public IntervalIterator intervals(String field) { - List subIntervals = new ArrayList<>(); - for (Scorer scorer : required) { - IntervalIterator it = scorer.intervals(field); - if (it != null) { - subIntervals.add(it); - } - } - if (subIntervals.size() == 0) { - return null; + static final class DocsAndFreqs { + final long cost; + final DocIdSetIterator iterator; + int doc = -1; + + DocsAndFreqs(DocIdSetIterator iterator) { + this.iterator = iterator; + this.cost = iterator.cost(); } - return new DisjunctionIntervalIterator(subIntervals.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - for (IntervalIterator it : subIntervals) { - it.reset(doc); - queue.add(it); - } - } - }; } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 0040374b6147..45a6bdbad041 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -63,11 +63,6 @@ public DocIdSetIterator iterator() { return disi; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public TwoPhaseIterator twoPhaseIterator() { return twoPhaseIterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 218e493a2316..2ee7d36d78ac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -101,13 +101,22 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); + public boolean advanceTo(int doc) throws IOException { + bpos = b.advanceTo(doc); if (bpos) bpos = b.nextInterval() != NO_MORE_INTERVALS; - return a.reset(doc); + return a.advanceTo(doc); } + @Override + public DocIdSetIterator approximation() { + return a.approximation(); + } + + @Override + public float cost() { + return a.cost() + b.cost(); + } } private static class NonOverlappingIterator extends RelativeIterator { @@ -166,7 +175,7 @@ public int hashCode() { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new IntervalIterator() { + IntervalIterator notWithin = new FilterIntervalIterator(subtrahend) { @Override public int start() { int start = subtrahend.start(); @@ -186,16 +195,6 @@ public int end() { public int innerWidth() { throw new UnsupportedOperationException(); } - - @Override - public boolean reset(int doc) throws IOException { - return subtrahend.reset(doc); - } - - @Override - public int nextInterval() throws IOException { - return subtrahend.nextInterval(); - } }; return NON_OVERLAPPING.apply(minuend, notWithin); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java deleted file mode 100644 index b175fbac7fc8..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalQuery.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; -import java.util.Collections; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; - -/** - * A query that retrieves documents containing intervals returned from a - * {@link DifferenceIntervalFunction} over a minuend query and a subtrahend query - */ -public class DifferenceIntervalQuery extends Query { - - private final Query minuend; - private final Query subtrahend; - private final DifferenceIntervalFunction function; - private final String field; - - /** - * Create a new DifferenceIntervalQuery - * @param field the field to query - * @param minuend the subquery to filter - * @param subtrahend the subquery to filter by - * @param function a {@link DifferenceIntervalFunction} to combine the minuend and subtrahend - */ - public DifferenceIntervalQuery(String field, Query minuend, Query subtrahend, DifferenceIntervalFunction function) { - this.minuend = minuend; - this.subtrahend = subtrahend; - this.function = function; - this.field = field; - } - - @Override - public String toString(String field) { - return function + "(" + minuend + ", " + subtrahend + ")"; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Weight minuendWeight = searcher.createWeight(minuend, ScoreMode.COMPLETE_POSITIONS, 1); - Weight subtrahendWeight = searcher.createWeight(subtrahend, ScoreMode.COMPLETE_POSITIONS, 1); - return new IntervalDifferenceWeight(minuendWeight, subtrahendWeight, scoreMode, - searcher.getSimilarity(), IntervalQuery.buildSimScorer(field, searcher, Collections.singletonList(minuendWeight), boost)); - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - Query rewrittenMinuend = minuend.rewrite(reader); - Query rewrittenSubtrahend = subtrahend.rewrite(reader); - if (rewrittenMinuend != minuend || rewrittenSubtrahend != subtrahend) { - return new DifferenceIntervalQuery(field, rewrittenMinuend, rewrittenSubtrahend, function); - } - return this; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DifferenceIntervalQuery that = (DifferenceIntervalQuery) o; - return Objects.equals(minuend, that.minuend) && - Objects.equals(subtrahend, that.subtrahend) && - Objects.equals(function, that.function); - } - - @Override - public int hashCode() { - return Objects.hash(minuend, subtrahend, function); - } - - private class IntervalDifferenceWeight extends Weight { - - final Weight minuendWeight; - final Weight subtrahendWeight; - final ScoreMode scoreMode; - final Similarity similarity; - final Similarity.SimScorer simScorer; - - private IntervalDifferenceWeight(Weight minuendWeight, Weight subtrahendWeight, ScoreMode scoreMode, - Similarity similarity, Similarity.SimScorer simScorer) { - super(DifferenceIntervalQuery.this); - this.minuendWeight = minuendWeight; - this.subtrahendWeight = subtrahendWeight; - this.scoreMode = scoreMode; - this.similarity = similarity; - this.simScorer = simScorer; - } - - @Override - public void extractTerms(Set terms) { - this.minuendWeight.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - IntervalScorer scorer = (IntervalScorer) scorer(context); - if (scorer != null) { - int newDoc = scorer.iterator().advance(doc); - if (newDoc == doc) { - return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); - } - } - return Explanation.noMatch("no matching intervals"); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Scorer minuendScorer = minuendWeight.scorer(context); - Scorer subtrahendScorer = subtrahendWeight.scorer(context); - if (subtrahendScorer == null || minuendScorer == null) - return minuendScorer; - - IntervalIterator minuendIt = minuendScorer.intervals(field); - IntervalIterator subtrahendIt = subtrahendScorer.intervals(field); - if (subtrahendIt == IntervalIterator.EMPTY || subtrahendIt == null) - return minuendScorer; - - LeafSimScorer leafScorer = simScorer == null ? null - : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); - - return new IntervalScorer(this, field, minuendScorer.iterator(), function.apply(minuendIt, subtrahendIt), leafScorer){ - @Override - public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(approximation) { - @Override - public boolean matches() throws IOException { - if (subtrahendScorer.docID() < approximation.docID()) { - subtrahendScorer.iterator().advance(approximation.docID()); - } - return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return 0; - } - }; - } - }; - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return minuendWeight.isCacheable(ctx) && subtrahendWeight.isCacheable(ctx); - } - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java new file mode 100644 index 000000000000..d26217729026 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class DifferenceIntervalsSource extends IntervalsSource { + + final IntervalsSource minuend; + final IntervalsSource subtrahend; + final DifferenceIntervalFunction function; + + public DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend, DifferenceIntervalFunction function) { + this.minuend = minuend; + this.subtrahend = subtrahend; + this.function = function; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + IntervalIterator minIt = minuend.intervals(field, ctx); + if (minIt == null) + return null; + IntervalIterator subIt = subtrahend.intervals(field, ctx); + if (subIt == null) + return minIt; + return function.apply(minIt, subIt); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DifferenceIntervalsSource that = (DifferenceIntervalsSource) o; + return Objects.equals(minuend, that.minuend) && + Objects.equals(subtrahend, that.subtrahend) && + Objects.equals(function, that.function); + } + + @Override + public int hashCode() { + return Objects.hash(minuend, subtrahend, function); + } + + @Override + public String toString() { + return function + "(" + minuend + ", " + subtrahend + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + minuend.extractTerms(field, terms); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index fac9418010f4..aba5dff908a6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -46,9 +46,14 @@ public class DisiWrapper { public int lastApproxMatchDoc; // last doc of approximation that did match public int lastApproxNonMatchDoc; // last doc of approximation that did not match + // For IntervalIterators + // TODO clean this up! + public final IntervalIterator intervals; + public DisiWrapper(Scorer scorer) { this.scorer = scorer; this.spans = null; + this.intervals = null; this.iterator = scorer.iterator(); this.cost = iterator.cost(); this.doc = -1; @@ -66,6 +71,7 @@ public DisiWrapper(Scorer scorer) { public DisiWrapper(Spans spans) { this.scorer = null; this.spans = spans; + this.intervals = null; this.iterator = spans; this.cost = iterator.cost(); this.doc = -1; @@ -81,5 +87,17 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } + + public DisiWrapper(IntervalIterator iterator) { + this.scorer = null; + this.spans = null; + this.intervals = iterator; + this.iterator = iterator.approximation(); + this.cost = iterator.approximation().cost(); + this.doc = -1; + this.twoPhaseView = null; + this.approximation = iterator.approximation(); + this.matchCost = iterator.cost(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java deleted file mode 100644 index ce3ff22b41b2..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalIterator.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; - -import org.apache.lucene.util.PriorityQueue; - -/** - * Implements the minimum-interval OR algorithm - */ -abstract class DisjunctionIntervalIterator implements IntervalIterator { - - protected final PriorityQueue queue; - - IntervalIterator current; - - DisjunctionIntervalIterator(int iteratorCount) { - this.queue = new PriorityQueue(iteratorCount) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; - } - - @Override - public int start() { - return current.start(); - } - - @Override - public int end() { - return current.end(); - } - - @Override - public int innerWidth() { - return current.innerWidth(); - } - - /** - * Called to repopulate the interval priority queue when moving to a new document - */ - protected abstract void fillQueue(int doc) throws IOException; - - @Override - public boolean reset(int doc) throws IOException { - queue.clear(); - fillQueue(doc); - current = null; - return queue.size() > 0; - } - - @Override - public int nextInterval() throws IOException { - if (current == null) { - current = queue.top(); - return current.start(); - } - int start = current.start(), end = current.end(); - while (queue.size() > 0 && contains(queue.top(), start, end)) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - queue.add(it); - } - } - if (queue.size() == 0) { - current = IntervalIterator.EMPTY; - return IntervalIterator.NO_MORE_INTERVALS; - } - current = queue.top(); - return current.start(); - } - - private boolean contains(IntervalIterator it, int start, int end) { - return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); - } - -} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java new file mode 100644 index 000000000000..053ddd4a02f5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.PriorityQueue; + +class DisjunctionIntervalsSource extends IntervalsSource { + + final List subSources; + + public DisjunctionIntervalsSource(List subSources) { + this.subSources = subSources; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + List subIterators = new ArrayList<>(); + for (IntervalsSource subSource : subSources) { + IntervalIterator it = subSource.intervals(field, ctx); + if (it != null) { + subIterators.add(it); + } + } + if (subIterators.size() == 0) + return null; + return new DisjunctionIntervalIterator(subIterators); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; + return Objects.equals(subSources, that.subSources); + } + + @Override + public int hashCode() { + return Objects.hash(subSources); + } + + @Override + public String toString() { + return subSources.stream().map(Object::toString).collect(Collectors.joining(",", "or(", ")")); + } + + @Override + public void extractTerms(String field, Set terms) { + for (IntervalsSource source : subSources) { + source.extractTerms(field, terms); + } + } + + private static class DisjunctionIntervalIterator implements IntervalIterator { + + final PriorityQueue intervalQueue; + final DisiPriorityQueue disiQueue; + final DisjunctionDISIApproximation approximation; + final List iterators; + final float matchCost; + + IntervalIterator current; + + DisjunctionIntervalIterator(List iterators) { + this.iterators = iterators; + this.intervalQueue = new PriorityQueue(iterators.size()) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + this.disiQueue = new DisiPriorityQueue(iterators.size()); + float costsum = 0; + for (IntervalIterator it : iterators) { + this.disiQueue.add(new DisiWrapper(it)); + costsum += it.cost(); + } + this.matchCost = costsum; + this.approximation = new DisjunctionDISIApproximation(this.disiQueue); + } + + @Override + public DocIdSetIterator approximation() { + return approximation; + } + + @Override + public float cost() { + return matchCost; + } + + @Override + public int start() { + return current.start(); + } + + @Override + public int end() { + return current.end(); + } + + @Override + public int innerWidth() { + return current.innerWidth(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + intervalQueue.clear(); + int approxDoc = this.approximation.docID(); + if (approxDoc > doc || (approxDoc != doc && this.approximation.advance(doc) != doc)) { + return false; + } + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + IntervalIterator it = dw.intervals; + if (it.advanceTo(doc)) { + it.nextInterval(); + intervalQueue.add(it); + } + } + current = null; + return intervalQueue.size() > 0; + } + + @Override + public int nextInterval() throws IOException { + if (current == null) { + current = intervalQueue.top(); + return current.start(); + } + int start = current.start(), end = current.end(); + while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) { + IntervalIterator it = intervalQueue.pop(); + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { + intervalQueue.add(it); + } + } + if (intervalQueue.size() == 0) { + current = null; + return IntervalIterator.NO_MORE_INTERVALS; + } + current = intervalQueue.top(); + return current.start(); + } + + private boolean contains(IntervalIterator it, int start, int end) { + return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); + } + + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 47d37af01f3a..147b993f2d9f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -20,10 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; -import java.util.IdentityHashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.util.PriorityQueue; @@ -130,13 +127,18 @@ public boolean matches() throws IOException { // implicitly verified, move it to verifiedMatches w.next = verifiedMatches; verifiedMatches = w; + + if (needsScores == false) { + // we can stop here + return true; + } } else { unverifiedMatches.add(w); } w = next; } - if (verifiedMatches != null || needsScores == false) { + if (verifiedMatches != null) { return true; } @@ -178,30 +180,6 @@ public final float score() throws IOException { return score(getSubMatches()); } - @Override - public IntervalIterator intervals(String field) { - Map subIntervals = new IdentityHashMap<>(); - for (DisiWrapper dw : subScorers) { - IntervalIterator subIt = dw.scorer.intervals(field); - if (subIt != null) - subIntervals.put(dw, subIt); - } - if (subIntervals.size() == 0) - return null; - return new DisjunctionIntervalIterator(subIntervals.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - for (DisiWrapper dw = getSubMatches(); dw != null; dw = dw.next) { - IntervalIterator it = subIntervals.get(dw); - if (it.reset(doc)) { - it.nextInterval(); - queue.add(it); - } - } - } - }; - } - /** Compute the score for the given linked list of scorers. */ protected abstract float score(DisiWrapper topList) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index c69f175505fc..d7c4f9f6e2b8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -38,7 +38,6 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private final DocIdSetIterator conjunction; private final PostingsAndPosition[] postings; - private final String field; private int freq; @@ -47,17 +46,13 @@ public PostingsAndPosition(PostingsEnum postings, int offset) { private float matchCost; private float minCompetitiveScore; - private final IntervalIterator intervals; - - ExactPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, LeafSimScorer docScorer, ScoreMode scoreMode, float matchCost) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = scoreMode.needsScores(); this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES; - this.field = field; - this.intervals = new ExactPhraseIntervals(); List iterators = new ArrayList<>(); List postingsAndPositions = new ArrayList<>(); @@ -91,9 +86,7 @@ public boolean matches() throws IOException { return false; } } - freq = -1; - intervals.reset(docID()); - return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + return phraseFreq() > 0; } @Override @@ -113,8 +106,7 @@ public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - final int freq() throws IOException { - ensureFreq(); + final int freq() { return freq; } @@ -125,7 +117,6 @@ public int docID() { @Override public float score() throws IOException { - ensureFreq(); return docScorer.score(docID(), freq); } @@ -134,22 +125,6 @@ public float getMaxScore(int upTo) throws IOException { return docScorer.maxScore(); } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field) == false) - return null; - return new CachedIntervalIterator(intervals, this); - } - - private void ensureFreq() throws IOException { - if (freq == -1) { - freq = 1; - while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - freq++; - } - } - } - /** Advance the given pos enum to the first doc on or after {@code target}. * Return {@code false} if the enum was exhausted before reaching * {@code target} and {@code true} otherwise. */ @@ -165,67 +140,52 @@ private static boolean advancePosition(PostingsAndPosition posting, int target) return true; } - private class ExactPhraseIntervals implements IntervalIterator { - - @Override - public int start() { - return postings[0].pos; + private int phraseFreq() throws IOException { + // reset state + final PostingsAndPosition[] postings = this.postings; + for (PostingsAndPosition posting : postings) { + posting.freq = posting.postings.freq(); + posting.pos = posting.postings.nextPosition(); + posting.upTo = 1; } - @Override - public int end() { - return postings[postings.length - 1].pos; - } + int freq = 0; + final PostingsAndPosition lead = postings[0]; - @Override - public int innerWidth() { - return 0; - } + advanceHead: + while (true) { + final int phrasePos = lead.pos - lead.offset; + for (int j = 1; j < postings.length; ++j) { + final PostingsAndPosition posting = postings[j]; + final int expectedPos = phrasePos + posting.offset; - @Override - public boolean reset(int doc) throws IOException { - if (conjunction.docID() != doc) - return false; - for (PostingsAndPosition posting : postings) { - posting.freq = posting.postings.freq(); - posting.pos = -1; - posting.upTo = 0; + // advance up to the same position as the lead + if (advancePosition(posting, expectedPos) == false) { + break advanceHead; + } + + if (posting.pos != expectedPos) { // we advanced too far + if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { + continue advanceHead; + } else { + break advanceHead; + } + } } - return true; - } - @Override - public int nextInterval() throws IOException { - final PostingsAndPosition lead = postings[0]; - if (lead.upTo == lead.freq) - return IntervalIterator.NO_MORE_INTERVALS; + freq += 1; + if (needsScores == false) { + break; + } + if (lead.upTo == lead.freq) { + break; + } lead.pos = lead.postings.nextPosition(); lead.upTo += 1; - - advanceHead: - while (true) { - final int phrasePos = lead.pos - lead.offset; - for (int j = 1; j < postings.length; ++j) { - final PostingsAndPosition posting = postings[j]; - final int expectedPos = phrasePos + posting.offset; - - // advance up to the same position as the lead - if (advancePosition(posting, expectedPos) == false) { - return IntervalIterator.NO_MORE_INTERVALS; - } - - if (posting.pos != expectedPos) { // we advanced too far - if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) { - continue advanceHead; - } else { - return IntervalIterator.NO_MORE_INTERVALS; - } - } - } - return lead.pos; - } } + + return this.freq = freq; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index 1fcac3a05107..c8b34381b2b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -45,11 +45,6 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 4ca9f6e01aa3..358aee4997ac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -43,8 +43,13 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - return in.reset(doc); + public DocIdSetIterator approximation() { + return in.approximation(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return in.advanceTo(doc); } @Override @@ -56,4 +61,9 @@ public int nextInterval() throws IOException { public float score() { return in.score(); } + + @Override + public float cost() { + return in.cost(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 6de7e107300e..7bcb1ce4a64b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -67,11 +67,6 @@ public final int docID() { return in.docID(); } - @Override - public IntervalIterator intervals(String field) { - return in.intervals(field); - } - @Override public final DocIdSetIterator iterator() { return in.iterator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 9011183f7aca..da5ed036ddc0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -36,7 +36,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -698,7 +697,7 @@ public Weight createNormalizedWeight(Query query, ScoreMode scoreMode) throws IO public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); - if (scoreMode.minRequiredPostings() == PostingsEnum.NONE && queryCache != null) { + if (scoreMode.needsScores() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index deb842cb1a49..b968f88ffd96 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -22,7 +22,7 @@ /** * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function */ -public abstract class IntervalFilter implements IntervalIterator { +public abstract class IntervalFilter extends FilterIntervalIterator { /** * Filter an {@link IntervalIterator} by its outer width, ie the distance between the @@ -52,13 +52,11 @@ protected boolean accept() { }; } - private final IntervalIterator in; - /** * Create a new filter */ public IntervalFilter(IntervalIterator in) { - this.in = in; + super(in); } /** @@ -76,23 +74,4 @@ public final int nextInterval() throws IOException { return next; } - @Override - public final int start() { - return in.start(); - } - - @Override - public final int end() { - return in.end(); - } - - @Override - public int innerWidth() { - return in.innerWidth(); - } - - @Override - public boolean reset(int doc) throws IOException { - return in.reset(doc); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 0a47ca1483fe..ff2a338bae8a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -98,15 +98,13 @@ public int hashCode() { private static IntervalIterator orderedIntervalIterator(List subIterators) { for (IntervalIterator it : subIterators) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; + if (it == null) + return null; } return new OrderedIntervalIterator(subIterators); } - private static class OrderedIntervalIterator implements IntervalIterator { - - final List subIntervals; + private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { int start; int end; @@ -114,7 +112,7 @@ private static class OrderedIntervalIterator implements IntervalIterator { int i; private OrderedIntervalIterator(List subIntervals) { - this.subIntervals = subIntervals; + super(subIntervals); } @Override @@ -133,15 +131,10 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - boolean positioned = true; - for (IntervalIterator it : subIntervals) { - positioned &= it.reset(doc); - } - subIntervals.get(0).nextInterval(); + public void reset() throws IOException { + subIterators.get(0).nextInterval(); i = 1; start = end = innerWidth = Integer.MIN_VALUE; - return positioned; } @Override @@ -150,23 +143,23 @@ public int nextInterval() throws IOException { int b = Integer.MAX_VALUE; while (true) { while (true) { - if (subIntervals.get(i - 1).end() >= b) + if (subIterators.get(i - 1).end() >= b) return start; - if (i == subIntervals.size() || subIntervals.get(i).start() > subIntervals.get(i - 1).end()) + if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end()) break; do { - if (subIntervals.get(i).end() >= b || subIntervals.get(i).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) return start; } - while (subIntervals.get(i).start() <= subIntervals.get(i - 1).end()); + while (subIterators.get(i).start() <= subIterators.get(i - 1).end()); i++; } - start = subIntervals.get(0).start(); - end = subIntervals.get(subIntervals.size() - 1).end(); - b = subIntervals.get(subIntervals.size() - 1).start(); - innerWidth = b - subIntervals.get(0).end() - 1; + start = subIterators.get(0).start(); + end = subIterators.get(subIterators.size() - 1).end(); + b = subIterators.get(subIterators.size() - 1).start(); + innerWidth = b - subIterators.get(0).end() - 1; i = 1; - if (subIntervals.get(0).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; } } @@ -227,14 +220,10 @@ public int hashCode() { } private static IntervalIterator unorderedIntervalIterator(List subIntervals) { - for (IntervalIterator it : subIntervals) { - if (it == IntervalIterator.EMPTY) - return IntervalIterator.EMPTY; - } return new UnorderedIntervalIterator(subIntervals); } - private static class UnorderedIntervalIterator implements IntervalIterator { + private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator { private final PriorityQueue queue; private final IntervalIterator[] subIterators; @@ -242,6 +231,7 @@ private static class UnorderedIntervalIterator implements IntervalIterator { int start, end, innerStart, innerEnd, queueEnd; UnorderedIntervalIterator(List subIterators) { + super(subIterators); this.queue = new PriorityQueue(subIterators.size()) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @@ -271,24 +261,17 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { + public void reset() throws IOException { this.queue.clear(); this.queueEnd = start = end = innerEnd = innerStart = -1; - boolean positioned = true; for (IntervalIterator subIterator : subIterators) { - if (subIterator.reset(doc)) { - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - innerEnd = subIterator.start(); - } - } - else { - positioned = false; + subIterator.nextInterval(); + queue.add(subIterator); + if (subIterator.end() > queueEnd) { + queueEnd = subIterator.end(); + innerEnd = subIterator.start(); } } - return positioned; } void updateRightExtreme(IntervalIterator it) { @@ -337,7 +320,7 @@ public IntervalIterator apply(List iterators) { throw new IllegalStateException("CONTAINING function requires two iterators"); IntervalIterator a = iterators.get(0); IntervalIterator b = iterators.get(1); - return new IntervalIterator() { + return new ConjunctionIntervalIterator(iterators) { boolean bpos; @@ -357,9 +340,8 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); - return a.reset(doc); + public void reset() { + bpos = true; } @Override @@ -390,7 +372,7 @@ public IntervalIterator apply(List iterators) { throw new IllegalStateException("CONTAINED_BY function requires two iterators"); IntervalIterator a = iterators.get(0); IntervalIterator b = iterators.get(1); - return new IntervalIterator() { + return new ConjunctionIntervalIterator(iterators) { boolean bpos; @@ -410,9 +392,8 @@ public int innerWidth() { } @Override - public boolean reset(int doc) throws IOException { - bpos = b.reset(doc); - return a.reset(doc); + public void reset() throws IOException { + bpos = true; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 22b6c043ce4f..d33a47530031 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -31,6 +31,10 @@ public interface IntervalIterator { */ int NO_MORE_INTERVALS = Integer.MAX_VALUE; + DocIdSetIterator approximation(); + + boolean advanceTo(int doc) throws IOException; + /** * The start of the current interval */ @@ -46,13 +50,6 @@ public interface IntervalIterator { */ int innerWidth(); - /** - * Called to reset the iterator on a new document - * - * @return true if the iterator's parent Scorer is positioned on the given doc id - */ - boolean reset(int doc) throws IOException; - /** * Advance the iterator to the next interval * @@ -68,36 +65,6 @@ default float score() { return (float) (1.0 / (1 + innerWidth())); } - /** - * An empty iterator that always returns {@code false} from {@link #reset(int)} and - * {@link IntervalIterator#NO_MORE_INTERVALS} from {@link #nextInterval()} - */ - IntervalIterator EMPTY = new IntervalIterator() { - - @Override - public int start() { - return -1; - } - - @Override - public int end() { - return -1; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) { - return false; - } - - @Override - public int nextInterval() { - return NO_MORE_INTERVALS; - } - }; + float cost(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 3c2683cb6046..1130d327613a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -33,24 +33,21 @@ /** * A query that retrieves documents containing intervals returned from an - * {@link IntervalFunction} over a set of subqueries + * {@link IntervalsSource} */ public final class IntervalQuery extends Query { private final String field; - private final List subQueries; - private final IntervalFunction iteratorFunction; + private final IntervalsSource intervalsSource; /** * Create a new IntervalQuery * @param field the field to query - * @param subQueries the subqueries to generate intervals from - * @param iteratorFunction an {@link IntervalFunction} to combine the intervals from the subqueries + * @param intervalsSource an {@link IntervalsSource} to retrieve intervals from */ - public IntervalQuery(String field, List subQueries, IntervalFunction iteratorFunction) { + public IntervalQuery(String field, IntervalsSource intervalsSource) { this.field = field; - this.subQueries = subQueries; - this.iteratorFunction = iteratorFunction; + this.intervalsSource = intervalsSource; } public String getField() { @@ -59,25 +56,18 @@ public String getField() { @Override public String toString(String field) { - return iteratorFunction.toString() + subQueries.stream().map(Object::toString) - .collect(Collectors.joining(",", "(", ")")); + return intervalsSource.toString(); } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - List subWeights = new ArrayList<>(); - for (Query q : subQueries) { - subWeights.add(searcher.createWeight(q, ScoreMode.COMPLETE_POSITIONS, boost)); - } - return new IntervalWeight(this, subWeights, scoreMode.needsScores() ? buildSimScorer(field, searcher, subWeights, boost) : null, + return new IntervalWeight(this, scoreMode.needsScores() ? buildSimScorer(searcher, boost) : null, searcher.getSimilarity(), scoreMode); } - static Similarity.SimScorer buildSimScorer(String field, IndexSearcher searcher, List subWeights, float boost) throws IOException { + private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) throws IOException { Set terms = new HashSet<>(); - for (Weight w : subWeights) { - w.extractTerms(terms); - } + intervalsSource.extractTerms(field, terms); TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; for (Term term : terms) { @@ -96,25 +86,22 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; IntervalQuery that = (IntervalQuery) o; return Objects.equals(field, that.field) && - Objects.equals(subQueries, that.subQueries) && - Objects.equals(iteratorFunction, that.iteratorFunction); + Objects.equals(intervalsSource, that.intervalsSource); } @Override public int hashCode() { - return Objects.hash(field, subQueries, iteratorFunction); + return Objects.hash(field, intervalsSource); } private class IntervalWeight extends Weight { - final List subWeights; final Similarity.SimScorer simScorer; final Similarity similarity; final ScoreMode scoreMode; - public IntervalWeight(Query query, List subWeights, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { + public IntervalWeight(Query query, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { super(query); - this.subWeights = subWeights; this.simScorer = simScorer; this.similarity = similarity; this.scoreMode = scoreMode; @@ -122,9 +109,7 @@ public IntervalWeight(Query query, List subWeights, Similarity.SimScorer @Override public void extractTerms(Set terms) { - for (Weight w : subWeights) { - w.extractTerms(terms); - } + intervalsSource.extractTerms(field, terms); } @Override @@ -141,30 +126,16 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio @Override public Scorer scorer(LeafReaderContext context) throws IOException { - List subIntervals = new ArrayList<>(); - List disis = new ArrayList<>(); - for (Weight w : subWeights) { - Scorer scorer = w.scorer(context); - if (scorer == null) - return null; - disis.add(scorer.iterator()); - IntervalIterator it = scorer.intervals(field); - if (it == null) - return null; - subIntervals.add(it); - } - IntervalIterator intervals = IntervalQuery.this.iteratorFunction.apply(subIntervals); + IntervalIterator intervals = intervalsSource.intervals(field, context); + if (intervals == null) + return null; LeafSimScorer leafScorer = simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); - return new IntervalScorer(this, field, ConjunctionDISI.intersectIterators(disis), intervals, leafScorer); + return new IntervalScorer(this, intervals, leafScorer); } @Override public boolean isCacheable(LeafReaderContext ctx) { - for (Weight w : subWeights) { - if (w.isCacheable(ctx) == false) - return false; - } return true; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 8d15dcf626fa..d70e93d7705c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -21,21 +21,18 @@ class IntervalScorer extends Scorer { - protected final IntervalIterator intervals; - private final String field; - protected final DocIdSetIterator approximation; + private final IntervalIterator intervals; + private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; private float freq = -1; private int lastScoredDoc = -1; - protected IntervalScorer(Weight weight, String field, DocIdSetIterator approximation, - IntervalIterator intervals, LeafSimScorer simScorer) { + protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = approximation; + this.approximation = intervals.approximation(); this.simScorer = simScorer; - this.field = field; } @Override @@ -74,13 +71,6 @@ private void ensureFreq() throws IOException { } } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new CachedIntervalIterator(intervals, this); - return null; - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); @@ -91,12 +81,12 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.reset(approximation.docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + return intervals.advanceTo(docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override public float matchCost() { - return 0; + return intervals.cost(); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index bc0455289240..175b50762316 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,6 +19,8 @@ import java.util.Arrays; +import org.apache.lucene.util.BytesRef; + /** * Constructor functions for interval-based queries * @@ -31,189 +33,185 @@ public final class Intervals { private Intervals() {} + public static IntervalsSource term(BytesRef term) { + return new TermIntervalsSource(term); + } + + public static IntervalsSource term(String term) { + return new TermIntervalsSource(new BytesRef(term)); + } + + public static IntervalsSource phrase(String... terms) { + IntervalsSource[] sources = new IntervalsSource[terms.length]; + int i = 0; + for (String term : terms) { + sources[i] = term(term); + i++; + } + return orderedNear(0, sources); + } + + public static IntervalsSource or(IntervalsSource... subSources) { + if (subSources.length == 1) + return subSources[0]; + return new DisjunctionIntervalsSource(Arrays.asList(subSources)); + } + /** - * Create an ordered query with a maximum width + * Create an ordered {@link IntervalsSource} with a maximum width * - * Matches documents in which the subqueries all match in the given order, and - * in which the width of the interval over which the queries match is less than + * Returns intervals in which the subsources all appear in the given order, and + * in which the width of the interval over which the subsources appear is less than * the defined width * - * @param field the field to query * @param width the maximum width of subquery-spanning intervals that will match - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(0, width)); + public static IntervalsSource orderedNear(int width, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(0, width)); } /** - * Create an ordered query with a defined width range + * Create an ordered {@link IntervalsSource} with a defined width range * - * Matches documents in which the subqueries all match in the given order, and in - * which the width of the interval over which the queries match is between the + * Returns intervals in which the subsources all appear in the given order, and in + * which the width of the interval over which the subsources appear is between the * minimum and maximum defined widths * - * @param field the field to query * @param minWidth the minimum width of subquery-spanning intervals that will match * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource orderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); } /** - * Create an ordered query with an unbounded width range + * Create an ordered {@link IntervalsSource} with an unbounded width range * - * Matches documents in which the subqueries all match in the given order + * Returns intervals in which the subsources all appear in the given order * - * @param field the field to query - * @param subQueries an ordered set of subqueries + * @param subSources an ordered set of {@link IntervalsSource} objects */ - public static Query orderedQuery(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.ORDERED); + public static IntervalsSource ordered(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED); } /** - * Create an unordered query with a maximum width + * Create an unordered {@link IntervalsSource} with a maximum width * - * Matches documents in which the subqueries all match in any order, and in which - * the width of the interval over which the queries match is less than the + * Returns intervals in which the subsources all appear in any order, and in which + * the width of the interval over which the subsources appear is less than the * defined width * - * @param field the field to query * @param width the maximum width of subquery-spanning intervals that will match - * @param subQueries an unordered set of queries + * @param subSources an unordered set of queries */ - public static Query unorderedQuery(String field, int width, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(0, width)); + public static IntervalsSource unorderedNear(int width, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(0, width)); } /** - * Create an unordered query with a defined width range + * Create an unordered {@link IntervalsSource} with a defined width range * - * Matches documents in which the subqueries all match in any order, and in which - * the width of the interval over which the queries match is between the minimum + * Returns intervals in which the subsources all appear in any order, and in which + * the width of the interval over which the subsources appear is between the minimum * and maximum defined widths * - * @param field the field to query * @param minWidth the minimum width of subquery-spanning intervals that will match * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subQueries an unordered set of queries + * @param subSources an unordered set of subsources */ - public static Query unorderedQuery(String field, int minWidth, int maxWidth, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource unorderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); } /** - * Create an unordered query with an unbounded width range + * Create an unordered {@link IntervalsSource} with an unbounded width range * - * Matches documents in which all the subqueries match. This is essence a pure conjunction - * query, but it will expose iterators over those conjunctions that may then be further - * nested in other interval queries + * Returns intervals in which all the subsources appear. * - * @param field the field to query - * @param subQueries an unordered set of queries + * @param subSources an unordered set of queries */ - public static Query unorderedQuery(String field, Query... subQueries) { - return new IntervalQuery(field, Arrays.asList(subQueries), IntervalFunction.UNORDERED); + public static IntervalsSource unordered(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.UNORDERED); } /** - * Create a non-overlapping query + * Create a non-overlapping IntervalsSource * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query overlap with intervals from the subtrahend query - * - * Exposes matching intervals from the minuend - * - * @param field the field to query - * @param minuend the query to filter - * @param subtrahend the query to filter by + * Returns intervals of the minuend that do not overlap with intervals from the subtrahend + + * @param minuend the {@link IntervalsSource} to filter + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query nonOverlappingQuery(String field, Query minuend, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); + public static IntervalsSource nonOverlapping(IntervalsSource minuend, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); } /** - * Create a not-within query - * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query appear within a set number of positions of intervals from the subtrahend query + * Create a not-within {@link IntervalsSource} * - * Exposes matching intervals from the minuend + * Returns intervals of the minuend that do not appear within a set number of positions of + * intervals from the subtrahend query * - * @param field the field to query - * @param minuend the query to filter + * @param minuend the {@link IntervalsSource} to filter * @param positions the maximum distance that intervals from the minuend may occur from intervals * of the subtrahend - * @param subtrahend the query to filter by + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query notWithinQuery(String field, Query minuend, int positions, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); + public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); } /** - * Create a not-containing query + * Create a not-containing {@link IntervalsSource} * - * Matches documents that match the minuend query, except when the intervals of the minuend - * query are contained within an interval of the subtrahend query + * Returns intervals from the minuend that do not contain intervals of the subtrahend * - * Exposes matching intervals from the minuend - * - * @param field the field to query - * @param minuend the query to filter - * @param subtrahend the query to filter by + * @param minuend the {@link IntervalsSource} to filter + * @param subtrahend the {@link IntervalsSource} to filter by */ - public static Query notContainingQuery(String field, Query minuend, Query subtrahend) { - return new DifferenceIntervalQuery(field, minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); + public static IntervalsSource notContaining(IntervalsSource minuend, IntervalsSource subtrahend) { + return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); } /** - * Create a containing query - * - * Matches documents where intervals of the big query contain one or more intervals from - * the small query + * Create a containing {@link IntervalsSource} * - * Exposes matching intervals from the big query + * Returns intervals from the big source that contain one or more intervals from + * the small source * - * @param field the field to query - * @param big the query to filter - * @param small the query to filter by + * @param big the {@link IntervalsSource} to filter + * @param small the {@link IntervalsSource} to filter by */ - public static Query containingQuery(String field, Query big, Query small) { - return new IntervalQuery(field, Arrays.asList(big, small), IntervalFunction.CONTAINING); + public static IntervalsSource containing(IntervalsSource big, IntervalsSource small) { + return new ConjunctionIntervalsSource(Arrays.asList(big, small), IntervalFunction.CONTAINING); } /** - * Create a not-contained-by query + * Create a not-contained-by {@link IntervalsSource} * - * Matches documents that match the small query, except when the intervals of the small - * query are contained within an interval of the big query + * Returns intervals from the small {@link IntervalsSource} that do not appear within + * intervals from the big {@link IntervalsSource}. * - * Exposes matching intervals from the small query - * - * @param field the field to query - * @param small the query to filter - * @param big the query to filter by + * @param small the {@link IntervalsSource} to filter + * @param big the {@link IntervalsSource} to filter by */ - public static Query notContainedByQuery(String field, Query small, Query big) { - return new DifferenceIntervalQuery(field, small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); + public static IntervalsSource notContainedBy(IntervalsSource small, IntervalsSource big) { + return new DifferenceIntervalsSource(small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); } /** - * Create a contained-by query - * - * Matches documents where intervals of the small query occur within intervals of the big query + * Create a contained-by {@link IntervalsSource} * - * Exposes matching intervals from the small query + * Returns intervals from the small query that appear within intervals of the big query * - * @param field the field to query - * @param small the query to filter - * @param big the query to filter by + * @param small the {@link IntervalsSource} to filter + * @param big the {@link IntervalsSource} to filter by */ - public static Query containedByQuery(String field, Query small, Query big) { - return new IntervalQuery(field, Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); + public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource big) { + return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions diff --git a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java similarity index 54% rename from lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java rename to lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 5501cffae316..3bdf1e50655d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachedIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -18,37 +18,23 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Set; -/** - * An interval iterator which caches its first invocation. - * - * Useful for two-phase queries that confirm matches by checking that at least one - * interval exists in a given document - */ -class CachedIntervalIterator extends FilterIntervalIterator { +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; - final Scorer scorer; +public abstract class IntervalsSource { - private boolean started = false; + public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; - CachedIntervalIterator(IntervalIterator in, Scorer scorer) { - super(in); - this.scorer = scorer; - } + @Override + public abstract int hashCode(); @Override - public boolean reset(int doc) throws IOException { - // inner iterator already reset() in TwoPhaseIterator.matches() - started = false; - return doc == scorer.docID(); - } + public abstract boolean equals(Object other); @Override - public int nextInterval() throws IOException { - if (started == false) { - started = true; - return start(); - } - return in.nextInterval(); - } + public abstract String toString(); + + public abstract void extractTerms(String field, Set terms); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index fad7d9c99989..6ffbe340144e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -20,9 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.IdentityHashMap; import java.util.List; -import java.util.Map; import java.util.stream.LongStream; import java.util.stream.StreamSupport; @@ -327,32 +325,6 @@ public float score() throws IOException { return (float) score; } - @Override - public IntervalIterator intervals(String field) { - Map its = new IdentityHashMap<>(); - for (DisiWrapper s = lead; s != null; s = s.next) { - IntervalIterator it = s.scorer.intervals(field); - if (it != null) { - its.put(s, it); - } - } - if (its.size() == 0) - return null; - return new DisjunctionIntervalIterator(its.size()) { - @Override - protected void fillQueue(int doc) throws IOException { - updateFreq(); - for (DisiWrapper s = lead; s != null; s = s.next) { - IntervalIterator it = its.get(s); - if (it.reset(doc)) { - it.nextInterval(); - queue.add(it); - } - } - } - }; - } - @Override public float getMaxScore(int upTo) throws IOException { // TODO: implement but be careful about floating-point errors. diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 7df670d73f19..65d6631e9a7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -193,13 +193,11 @@ private class MultiPhraseWeight extends Weight { private final Similarity.SimScorer stats; private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; - private final int postingsFlags; public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; - this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); @@ -267,7 +265,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); - postings.add(termsEnum.postings(null, this.postingsFlags)); + postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); totalMatchCost += PhraseQuery.termPositionsCost(termsEnum); } } @@ -292,11 +290,11 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { - return new ExactPhraseScorer(this, field, postingsFreqs, + return new ExactPhraseScorer(this, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, field, postingsFreqs, slop, + return new SloppyPhraseScorer(this, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index d39cec2293e9..640cd5f20e45 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -24,8 +24,7 @@ * Position of a term in a document that takes into account the term offset within the phrase. */ final class PhrasePositions { - int realPosition; // position in doc - int position; // position in phrase + int position; // position in doc int count; // remaining pos in this doc int offset; // position in phrase final int ord; // unique across all PhrasePositions instances @@ -55,8 +54,7 @@ final void firstPosition() throws IOException { */ final boolean nextPosition() throws IOException { if (count-- > 0) { // read subsequent pos's - realPosition = postings.nextPosition(); - position = realPosition - offset; + position = postings.nextPosition() - offset; return true; } else return false; diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index a4ff6150815d..ff1538820d61 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -353,7 +353,6 @@ private class PhraseWeight extends Weight { private final Similarity similarity; private final Similarity.SimScorer stats; private final ScoreMode scoreMode; - private final int postingsFlags; private transient TermStates states[]; public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) @@ -366,7 +365,6 @@ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; - this.postingsFlags = Math.max(scoreMode.minRequiredPostings(), PostingsEnum.POSITIONS); this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermStates[terms.length]; @@ -424,7 +422,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { return null; } te.seekExact(t.bytes(), state); - PostingsEnum postingsEnum = te.postings(null, postingsFlags); + PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); totalMatchCost += termPositionsCost(te); } @@ -435,11 +433,11 @@ public Scorer scorer(LeafReaderContext context) throws IOException { } if (slop == 0) { // optimize exact case - return new ExactPhraseScorer(this, field, postingsFreqs, + return new ExactPhraseScorer(this, postingsFreqs, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { - return new SloppyPhraseScorer(this, field, postingsFreqs, slop, + return new SloppyPhraseScorer(this, postingsFreqs, slop, new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index f4f91b22e0db..987293eb0476 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -61,18 +61,6 @@ private static boolean matchesOrNull(TwoPhaseIterator it) throws IOException { return it == null || it.matches(); } - @Override - public IntervalIterator intervals(String field) { - return new FilterIntervalIterator(reqScorer.intervals(field)) { - @Override - public boolean reset(int doc) throws IOException { - if (doc == ReqExclScorer.this.docID()) - return in.reset(doc); - return false; - } - }; - } - @Override public DocIdSetIterator iterator() { return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 3069cb1ec2da..6d93a54560d4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -184,26 +184,6 @@ public DocIdSetIterator iterator() { } } - @Override - public IntervalIterator intervals(String field) { - IntervalIterator reqIntervals = reqScorer.intervals(field); - IntervalIterator optIntervals = optScorer.intervals(field); - if (optIntervals == null) - return reqIntervals; - if (reqIntervals == null) - return optIntervals; - return new DisjunctionIntervalIterator(2) { - @Override - protected void fillQueue(int doc) throws IOException { - reqIntervals.reset(doc); - queue.add(reqIntervals); - positionOptionalScorers(); - if (optIntervals.reset(doc)) - queue.add(optIntervals); - } - }; - } - @Override public int docID() { return reqScorer.docID(); @@ -212,17 +192,9 @@ public int docID() { @Override public float score() throws IOException { // TODO: sum into a double and cast to float if we ever send required clauses to BS1 - positionOptionalScorers(); + int curDoc = reqScorer.docID(); float score = reqScorer.score(); - if (optScorer.docID() == reqScorer.docID()) { - score += optScorer.score(); - } - return score; - } - - private void positionOptionalScorers() throws IOException { - int curDoc = reqScorer.docID(); int optScorerDoc = optApproximation.docID(); if (optScorerDoc < curDoc) { optScorerDoc = optApproximation.advance(curDoc); @@ -230,6 +202,11 @@ private void positionOptionalScorers() throws IOException { optScorerDoc = optApproximation.nextDoc(); } } + if (optScorerDoc == curDoc) { + score += optScorer.score(); + } + + return score; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java index 815286adddc9..31a5d108fc37 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java @@ -16,8 +16,6 @@ */ package org.apache.lucene.search; -import org.apache.lucene.index.PostingsEnum; - /** * Different modes of search. */ @@ -31,11 +29,6 @@ public enum ScoreMode { public boolean needsScores() { return true; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.FREQS; - } }, /** @@ -47,26 +40,6 @@ public int minRequiredPostings() { public boolean needsScores() { return false; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.NONE; - } - }, - - /** - * Produced scorers will allow visiting all matches, and expose positions - */ - COMPLETE_POSITIONS { - @Override - public boolean needsScores() { - return false; - } - - @Override - public int minRequiredPostings() { - return PostingsEnum.POSITIONS; - } }, /** @@ -78,22 +51,10 @@ public int minRequiredPostings() { public boolean needsScores() { return true; } - - @Override - public int minRequiredPostings() { - return PostingsEnum.FREQS; - } }; /** * Whether this {@link ScoreMode} needs to compute scores. */ public abstract boolean needsScores(); - - /** - * The minimum flags to be passed to {@link org.apache.lucene.index.TermsEnum#postings(PostingsEnum, int)} - */ - public abstract int minRequiredPostings(); - - } diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 9e1d46c8ecfc..81624ccac9d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -125,20 +125,6 @@ public ChildScorer(Scorer child, String relationship) { */ public abstract DocIdSetIterator iterator(); - /** - * Return a {@link IntervalIterator} over matching intervals for a given field - * - * Consumers should call {@link IntervalIterator#reset(int)} when the parent - * Scorer's {@link DocIdSetIterator} has moved to a new document, and then - * iterate over the intervals by repeatedly calling {@link IntervalIterator#nextInterval()} - * until {@link IntervalIterator#NO_MORE_INTERVALS} is returned. - * - * @param field The field to retrieve intervals for - * @return an {@link IntervalIterator}, or {@code null} if no intervals are available - * for the given field - */ - public abstract IntervalIterator intervals(String field); - /** * Optional method: Return a {@link TwoPhaseIterator} view of this * {@link Scorer}. A return value of {@code null} indicates that @@ -192,5 +178,4 @@ public int advanceShallow(int target) throws IOException { * included and {@code upTo} included. */ public abstract float getMaxScore(int upTo) throws IOException; - } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index de9546be3938..7587b37889b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -32,8 +32,6 @@ final class SloppyPhraseScorer extends Scorer { private final DocIdSetIterator conjunction; private final PhrasePositions[] phrasePositions; - private final IntervalIterator intervals; - private final String field; private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). @@ -42,8 +40,6 @@ final class SloppyPhraseScorer extends Scorer { private final int slop; private final int numPostings; private final PhraseQueue pq; // for advancing min position - - private int start, currentEnd, nextEnd; private int end; // current largest phrase position @@ -57,15 +53,13 @@ final class SloppyPhraseScorer extends Scorer { final boolean needsScores; private final float matchCost; - SloppyPhraseScorer(Weight weight, String field, PhraseQuery.PostingsAndFreq[] postings, + SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, int slop, LeafSimScorer docScorer, boolean needsScores, float matchCost) { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; this.slop = slop; - this.field = field; - this.intervals = new SloppyIntervalIterator(); this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length]; @@ -79,72 +73,61 @@ final class SloppyPhraseScorer extends Scorer { this.matchCost = matchCost; } - private class SloppyIntervalIterator implements IntervalIterator { - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return currentEnd; - } - - @Override - public int innerWidth() { - return currentEnd - start; - } - - @Override - public boolean reset(int doc) throws IOException { - start = currentEnd = nextEnd = -1; - return initPhrasePositions(); + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
    The score contribution of a match depends on the distance: + *
    - highest score for distance=0 (exact match). + *
    - score gets lower as distance gets higher. + *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
    Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ + private float phraseFreq() throws IOException { + if (!initPhrasePositions()) { + return 0.0f; } - - @Override - public int nextInterval() throws IOException { - if (pq.size() < phrasePositions.length) - return IntervalIterator.NO_MORE_INTERVALS; - currentEnd = nextEnd; - PhrasePositions pp = pq.pop(); - start = pp.realPosition; - int matchLength = end - pp.position; - int next = pq.top().position; - int nextStart = pq.top().realPosition; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { - break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length - if (matchLength <= slop) { - pq.add(pp); - if (pp.realPosition > nextEnd) - nextEnd = pp.realPosition; - return start; - } - pq.add(pp); - pp = pq.pop(); - next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { - matchLength = matchLength2; - } - if (pp.realPosition > nextStart) { - start = nextStart; - } - else { - start = pp.realPosition; + float freq = 0.0f; + numMatches = 0; + PhrasePositions pp = pq.pop(); + int matchLength = end - pp.position; + int next = pq.top().position; + while (advancePP(pp)) { + if (hasRpts && !advanceRpts(pp)) { + break; // pps exhausted + } + if (pp.position > next) { // done minimizing current match-length + if (matchLength <= slop) { + freq += (1.0 / (1.0 + matchLength)); // score match + numMatches++; + if (!needsScores) { + return freq; } + } + pq.add(pp); + pp = pq.pop(); + next = pq.top().position; + matchLength = end - pp.position; + } else { + int matchLength2 = end - pp.position; + if (matchLength2 < matchLength) { + matchLength = matchLength2; } } - if (matchLength <= slop) { - return start; - } - return IntervalIterator.NO_MORE_INTERVALS; } + if (matchLength <= slop) { + freq += (1.0 / (1.0 + matchLength)); // score match + numMatches++; + } + return freq; } /** advance a PhrasePosition and update 'end', return false if exhausted */ @@ -259,9 +242,6 @@ private void initSimple() throws IOException { if (pp.position > end) { end = pp.position; } - if (pp.realPosition > nextEnd) { - nextEnd = pp.realPosition; - } pq.add(pp); } } @@ -291,9 +271,6 @@ private void fillQueue() { if (pp.position > end) { end = pp.position; } - if (pp.realPosition > nextEnd) { - nextEnd = pp.realPosition; - } pq.add(pp); } } @@ -538,13 +515,11 @@ private HashMap termGroups(LinkedHashMap tord, Array return tg; } - int freq() throws IOException { - ensureFreq(); + int freq() { return numMatches; } - float sloppyFreq() throws IOException { - ensureFreq(); + float sloppyFreq() { return sloppyFreq; } @@ -568,36 +543,8 @@ float sloppyFreq() throws IOException { // } // } // } - - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
    The score contribution of a match depends on the distance: - *
    - highest score for distance=0 (exact match). - *
    - score gets lower as distance gets higher. - *
    Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
    Possibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - private void ensureFreq() throws IOException { - if (sloppyFreq == -1) { - numMatches = 1; - sloppyFreq = intervals.score(); - while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - sloppyFreq += intervals.score(); - numMatches++; - } - } - } - + + @Override public int docID() { return conjunction.docID(); @@ -605,7 +552,6 @@ public int docID() { @Override public float score() throws IOException { - ensureFreq(); return docScorer.score(docID(), sloppyFreq); } @@ -617,21 +563,13 @@ public float getMaxScore(int upTo) throws IOException { @Override public String toString() { return "scorer(" + weight + ")"; } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new CachedIntervalIterator(intervals, this); - return null; - } - @Override public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(conjunction) { @Override public boolean matches() throws IOException { - sloppyFreq = -1; - intervals.reset(docID()); - return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + sloppyFreq = phraseFreq(); // check for phrase + return sloppyFreq != 0F; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 00ab66610914..2a7c450805d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -29,7 +29,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; @@ -209,7 +208,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); totalMaxFreq += termMaxFreq; LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); - subScorers.add(new TermScorer(this, terms[i].field(), termsEnum, ScoreMode.COMPLETE, simScorer)); + subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer)); } } if (subScorers.isEmpty()) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java new file mode 100644 index 000000000000..c60af1273b3f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +class TermIntervalsSource extends IntervalsSource { + + final BytesRef term; + + TermIntervalsSource(BytesRef term) { + this.term = term; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + Terms terms = ctx.reader().terms(field); + if (terms == null) + return null; + TermsEnum te = terms.iterator(); + te.seekExact(term); + PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); + float cost = PhraseQuery.termPositionsCost(te); + return new IntervalIterator() { + + int pos, upto; + + @Override + public DocIdSetIterator approximation() { + return pe; + } + + @Override + public boolean advanceTo(int doc) throws IOException { + pos = -1; + if (pe.docID() > doc || (pe.docID() != doc && pe.advance(doc) != doc)) { + upto = -1; + return false; + } + else { + upto = pe.freq(); + return true; + } + } + + @Override + public int start() { + return pos; + } + + @Override + public int end() { + return pos; + } + + @Override + public int innerWidth() { + return 1; + } + + @Override + public int nextInterval() throws IOException { + if (upto <= 0) + return pos = NO_MORE_INTERVALS; + upto--; + return pos = pe.nextPosition(); + } + + @Override + public float cost() { + return cost; + } + + @Override + public String toString() { + return pe.docID() + ":" + pos; + } + }; + } + + @Override + public int hashCode() { + return Objects.hash(term); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TermIntervalsSource that = (TermIntervalsSource) o; + return Objects.equals(term, that.term); + } + + @Override + public String toString() { + return term.utf8ToString(); + } + + @Override + public void extractTerms(String field, Set terms) { + terms.add(new Term(field, term)); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 79dd976b7789..f1f44154f554 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -48,7 +48,7 @@ final class TermWeight extends Weight { private final ScoreMode scoreMode; public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, - float boost, TermStates termStates) throws IOException { + float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); @@ -98,7 +98,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { .getIndexOptions(); float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); - return new TermScorer(this, getTerm().field(), termsEnum, scoreMode, scorer); + return new TermScorer(this, termsEnum, scoreMode, scorer); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 79f00d30cb57..d51626fda8c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -27,13 +27,11 @@ /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { - private final PostingsEnum postingsEnum; private final ImpactsEnum impactsEnum; private final DocIdSetIterator iterator; private final LeafSimScorer docScorer; private float minCompetitiveScore; - private final String field; /** * Construct a TermScorer. @@ -45,12 +43,11 @@ final class TermScorer extends Scorer { * @param docScorer * A {@link LeafSimScorer} for the appropriate field. */ - TermScorer(Weight weight, String field, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { + TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; - this.field = field; if (scoreMode == ScoreMode.TOP_SCORES) { - impactsEnum = te.impacts(docScorer.getSimScorer(), scoreMode.minRequiredPostings()); + impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS); postingsEnum = impactsEnum; iterator = new DocIdSetIterator() { @@ -107,7 +104,7 @@ public long cost() { } }; } else { - postingsEnum = te.postings(null, scoreMode.minRequiredPostings()); + postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1)); iterator = postingsEnum; } @@ -127,14 +124,6 @@ public DocIdSetIterator iterator() { return iterator; } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) { - return new TermIntervalIterator(postingsEnum); - } - return null; - } - @Override public float score() throws IOException { assert docID() != DocIdSetIterator.NO_MORE_DOCS; @@ -159,56 +148,4 @@ public void setMinCompetitiveScore(float minScore) { /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; } - - private static class TermIntervalIterator implements IntervalIterator { - - public TermIntervalIterator(PostingsEnum pe) { - this.pe = pe; - } - - private final PostingsEnum pe; - - int upTo = -1; - int pos = -1; - - @Override - public int start() { - return pos; - } - - @Override - public int end() { - return pos; - } - - @Override - public int innerWidth() { - return 0; - } - - @Override - public boolean reset(int doc) throws IOException { - if (pe.docID() == doc) { - upTo = pe.freq(); - pos = -1; - return true; - } - upTo = -1; - return false; - } - - @Override - public int nextInterval() throws IOException { - if (upTo <= 0) { - return pos = NO_MORE_INTERVALS; - } - upTo--; - return pos = pe.nextPosition(); - } - - @Override - public String toString() { - return pe.docID() + "[" + pos + "]"; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index f4ef706fcf1d..f7a88f15927e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -440,11 +440,6 @@ private void advanceAllTail() throws IOException { assert ensureConsistent(); } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public float score() throws IOException { // we need to know about all matches diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 3106e1962a59..7853ccf2465b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 2ec0c5d2b372..666f163742a3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,7 +21,6 @@ import java.util.Objects; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IntervalIterator; import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; @@ -32,7 +31,6 @@ */ public class SpanScorer extends Scorer { - protected final String field; protected final Spans spans; protected final LeafSimScorer docScorer; @@ -43,11 +41,10 @@ public class SpanScorer extends Scorer { private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for /** Sole constructor. */ - public SpanScorer(SpanWeight weight, String field, Spans spans, LeafSimScorer docScorer) { + public SpanScorer(SpanWeight weight, Spans spans, LeafSimScorer docScorer) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; - this.field = field; } /** return the Spans for this Scorer **/ @@ -60,13 +57,6 @@ public int docID() { return spans.docID(); } - @Override - public IntervalIterator intervals(String field) { - if (this.field.equals(field)) - return new SpanIntervalIterator(); - return null; - } - @Override public DocIdSetIterator iterator() { return spans; @@ -156,32 +146,4 @@ final float sloppyFreq() throws IOException { return freq; } - private class SpanIntervalIterator implements IntervalIterator { - - @Override - public int start() { - return spans.startPosition(); - } - - @Override - public int end() { - return spans.endPosition() - 1; - } - - @Override - public int innerWidth() { - return spans.width(); - } - - @Override - public boolean reset(int doc) throws IOException { - return spans.docID() == doc; - } - - @Override - public int nextInterval() throws IOException { - return spans.nextStartPosition(); - } - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index f19ca742076f..25b58fdc39a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -130,7 +130,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, field, spans, docScorer); + return new SpanScorer(this, spans, docScorer); } /** diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index f80bd5d82d63..1657f9b9ced1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -189,11 +189,6 @@ public int docID() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } } static final class JustCompileSimilarity extends Similarity { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java index 21b2ea3285f2..3118fa85394c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java @@ -59,11 +59,6 @@ public DocIdSetIterator iterator() { return it; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public String toString() { return "FakeScorer(cost=" + it.cost() + ")"; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index 3933b07e02c8..12136b5b318a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -44,11 +44,6 @@ private MockScorer() { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } private static class NoOpCollector extends SimpleCollector { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java index f105216baae5..083ac248df91 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java @@ -150,11 +150,6 @@ public float score() throws IOException { public float getMaxScore(int upTo) throws IOException { return 0; } - - @Override - public IntervalIterator intervals(String field) { - return null; - } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 535d31dd0fa2..33fd8c18065f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,7 +73,7 @@ private void checkHits(Query query, int[] results) throws IOException { public void testScoring() throws IOException { PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w3"))); + Query equiv = new IntervalQuery(field, Intervals.phrase("w2", "w3")); TopDocs td1 = searcher.search(pq, 10); TopDocs td2 = searcher.search(equiv, 10); @@ -85,100 +85,76 @@ public void testScoring() throws IOException { } public void testOrderedNearQueryWidth0() throws IOException { - checkHits(Intervals.orderedQuery(field, 0, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(0, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); } public void testOrderedNearQueryWidth1() throws IOException { - checkHits(Intervals.orderedQuery(field, 1, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(1, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0, 1, 2, 5}); } public void testOrderedNearQueryWidth2() throws IOException { - checkHits(Intervals.orderedQuery(field, 2, new TermQuery(new Term(field, "w1")), - new TermQuery(new Term(field, "w2"))), + checkHits(new IntervalQuery(field, Intervals.orderedNear(2, Intervals.term("w1"), Intervals.term("w2"))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) - Query q = Intervals.orderedQuery(field, 1, - new TermQuery(new Term(field, "w1")), - Intervals.orderedQuery(field, 2, - new TermQuery(new Term(field, "w2")), - new TermQuery(new Term(field, "w3"))) - ); + Query q = new IntervalQuery(field, + Intervals.orderedNear(1, + Intervals.term("w1"), + Intervals.orderedNear(2, Intervals.term("w2"), Intervals.term("w3")))); checkHits(q, new int[]{0, 1, 2}); } - public void testNearPhraseQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, - new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).build(), - new TermQuery(new Term(field, "w4"))); - checkHits(q, new int[]{ 5 }); - } - - public void testSloppyPhraseQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, - new PhraseQuery.Builder().add(new Term(field, "w3")).add(new Term(field, "w2")).setSlop(2).build(), - new TermQuery(new Term(field, "w4"))); - checkHits(q, new int[]{ 0, 5 }); - } - public void testUnorderedQuery() throws IOException { - Query q = Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))); + Query q = new IntervalQuery(field, Intervals.unordered(Intervals.term("w1"), Intervals.term("w3"))); checkHits(q, new int[]{0, 1, 2, 3, 5}); } public void testNonOverlappingQuery() throws IOException { - Query q = Intervals.nonOverlappingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w3"))), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w2")), new TermQuery(new Term(field, "w4")))); - + Query q = new IntervalQuery(field, Intervals.nonOverlapping( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w3")), + Intervals.unordered(Intervals.term("w2"), Intervals.term("w4")))); checkHits(q, new int[]{1, 3, 5}); } public void testNotWithinQuery() throws IOException { - Query q = Intervals.notWithinQuery(field, new TermQuery(new Term(field, "w1")), 1, - new TermQuery(new Term(field, "w2"))); + Query q = new IntervalQuery(field, Intervals.notWithin(Intervals.term("w1"), 1, Intervals.term("w2"))); checkHits(q, new int[]{ 1, 2, 3 }); } public void testNotContainingQuery() throws IOException { - Query q = Intervals.notContainingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); - + Query q = new IntervalQuery(field, Intervals.notContaining( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), + Intervals.term("w3") + )); checkHits(q, new int[]{ 0, 2, 4, 5 }); } public void testContainingQuery() throws IOException { - Query q = Intervals.containingQuery(field, - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2"))), - new TermQuery(new Term(field, "w3"))); - + Query q = new IntervalQuery(field, Intervals.containing( + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), + Intervals.term("w3") + )); checkHits(q, new int[]{ 1, 3, 5 }); } public void testContainedByQuery() throws IOException { - Query q = Intervals.containedByQuery(field, - new TermQuery(new Term(field, "w3")), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w2")))); + Query q = new IntervalQuery(field, Intervals.containedBy( + Intervals.term("w3"), + Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")))); checkHits(q, new int[]{ 1, 3, 5 }); } public void testNotContainedByQuery() throws IOException { - Query q = Intervals.notContainedByQuery(field, - new TermQuery(new Term(field, "w2")), - Intervals.unorderedQuery(field, new TermQuery(new Term(field, "w1")), new TermQuery(new Term(field, "w4")))); + Query q = new IntervalQuery(field, Intervals.notContainedBy( + Intervals.term("w2"), + Intervals.unordered(Intervals.term("w1"), Intervals.term("w4")) + )); checkHits(q, new int[]{ 1, 3, 4, 5 }); } - // contained-by - // not-contained-by - - // TODO: Overlapping } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 339b4f657f62..5f5222ea41d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -84,22 +84,18 @@ public static void teardownIndex() throws IOException { IOUtils.close(searcher.getIndexReader(), directory); } - private void checkIntervals(Query query, String field, int expectedMatchCount, int[][] expected) throws IOException { - Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_POSITIONS, 1f); + private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - Scorer scorer = weight.scorer(ctx); - if (scorer == null) - continue; - assertNull(scorer.intervals(field + "1")); + assertNull(source.intervals(field + "1", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = scorer.intervals(field); - DocIdSetIterator it = scorer.iterator(); - for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { - matchedDocs++; + IntervalIterator intervals = source.intervals(field, ctx); + if (intervals == null) + continue; + for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.reset(doc)) { + if (intervals.advanceTo(doc)) { int i = 0, pos; while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); @@ -109,6 +105,8 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i i += 2; } assertEquals(expected[id].length, i); + if (i > 0) + matchedDocs++; } else { assertEquals(0, expected[id].length); @@ -119,7 +117,7 @@ private void checkIntervals(Query query, String field, int expectedMatchCount, i } public void testTermQueryIntervals() throws IOException { - checkIntervals(new TermQuery(new Term("field1", "porridge")), "field1", 4, new int[][]{ + checkIntervals(Intervals.term("porridge"), "field1", 4, new int[][]{ {}, { 1, 1, 4, 4, 7, 7 }, { 1, 1, 4, 4, 7, 7 }, @@ -129,38 +127,8 @@ public void testTermQueryIntervals() throws IOException { }); } - public void testExactPhraseQueryIntervals() throws IOException { - checkIntervals(new PhraseQuery.Builder() - .add(new Term("field1", "pease")) - .add(new Term("field1", "porridge")).build(), "field1", 3, new int[][]{ - {}, - { 0, 1, 3, 4, 6, 7 }, - { 0, 1, 3, 4, 6, 7 }, - {}, - { 0, 1, 3, 4, 6, 7 }, - {} - }); - } - - public void testSloppyPhraseQueryIntervals() throws IOException { - checkIntervals(new PhraseQuery.Builder() - .add(new Term("field1", "pease")) - .add(new Term("field1", "porridge")) - .add(new Term("field1", "hot")) - .setSlop(3).build(), "field1", 3, new int[][]{ - {}, - { 0, 2, 1, 3, 2, 4 }, - { 0, 5, 3, 5, 3, 7, 5, 7 }, - {}, - { 0, 2, 1, 3, 2, 4 }, - {} - } - ); - } - public void testOrderedNearIntervals() throws IOException { - checkIntervals(Intervals.orderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + checkIntervals(Intervals.ordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 3, new int[][]{ {}, { 0, 2, 6, 17 }, @@ -172,8 +140,7 @@ public void testOrderedNearIntervals() throws IOException { } public void testUnorderedNearIntervals() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), new TermQuery(new Term("field1", "hot"))), + checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ {}, { 0, 2, 2, 3, 6, 17 }, @@ -185,10 +152,7 @@ public void testUnorderedNearIntervals() throws IOException { } public void testIntervalDisjunction() throws IOException { - checkIntervals(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .build(), "field1", 4, new int[][]{ + checkIntervals(Intervals.or(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ {}, { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, @@ -199,13 +163,8 @@ public void testIntervalDisjunction() throws IOException { } public void testNesting() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", 100, - new TermQuery(new Term("field1", "pease")), - new TermQuery(new Term("field1", "porridge")), - new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "hot")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "cold")), BooleanClause.Occur.SHOULD) - .build()), "field1", 3, new int[][]{ + checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.or(Intervals.term("hot"), Intervals.term("cold"))), + "field1", 3, new int[][]{ {}, { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, @@ -215,81 +174,4 @@ public void testNesting() throws IOException { }); } - // x near ((a not b) or (c not d)) - public void testExclusionBooleans() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", - new TermQuery(new Term("field1", "pease")), - new BooleanQuery.Builder() - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field1", "years")), BooleanClause.Occur.MUST_NOT) - .build(), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.MUST_NOT) - .build(), BooleanClause.Occur.SHOULD) - .build()), "field1", 2, new int[][]{ - {}, - { 6, 11 }, - {}, - {}, - { 6, 21 }, - {} - }); - } - - public void testConjunctionBooleans() throws IOException { - checkIntervals(Intervals.unorderedQuery("field1", - new TermQuery(new Term("field1", "pease")), - new BooleanQuery.Builder() - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "nine")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field2", "caverns")), BooleanClause.Occur.MUST) - .build(), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "twelve")), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("field2", "sunless")), BooleanClause.Occur.MUST) - .build(), BooleanClause.Occur.SHOULD) - .build()), "field1", 2, new int[][]{ - {}, - { 6, 11 }, - { 6, 11 }, - {}, - {}, - {} - }); - } - - public void testMinimumShouldMatch() throws IOException { - checkIntervals(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD) - .add(new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "porridge")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "days")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field1", "fraggle")), BooleanClause.Occur.SHOULD) - .setMinimumNumberShouldMatch(2) - .build(), BooleanClause.Occur.SHOULD) - .build(), "field1", 4, new int[][]{ - {}, - { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 29, 29 }, - { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 12, 12, 27, 27 }, - { 7, 7 }, - { 0, 0, 3, 3, 6, 6 }, - {} - }); - } - - public void testSpanNearQueryEquivalence() throws IOException { - checkIntervals(new SpanNearQuery(new SpanQuery[]{ - new SpanTermQuery(new Term("field1", "pease")), - new SpanTermQuery(new Term("field1", "hot"))}, 100, true), - "field1", 3, new int[][]{ - {}, - {0, 2, 3, 17, 6, 17}, - {0, 5, 3, 5, 6, 21}, - {}, - { 0, 2, 3, 17, 6, 17 }, - { } - }); - } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index 0f90b1c18a94..f60435c57a30 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -402,10 +402,5 @@ public int docID() { } }; } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 81855bb4dc84..9fbd6a46b56f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -69,11 +69,6 @@ public long cost() { } }; } - - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } } // The scores must have positive as well as negative values diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index eb46ab49e466..d1f307d063ec 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -437,11 +437,6 @@ public int docID() { return docID; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 900267166894..257310176740 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -50,11 +50,6 @@ public float getMaxScore(int upTo) throws IOException { @Override public int docID() { return doc; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } - @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index c0f6b2401cb1..17c5f85dd898 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -245,11 +245,6 @@ public float getMaxScore(int upTo) throws IOException { public DocIdSetIterator iterator() { throw new UnsupportedOperationException(); } - - @Override - public IntervalIterator intervals(String field) { - return null; - } } public void testSetMinCompetitiveScore() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 6600b6ee92e4..f12e9100d656 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -264,12 +264,7 @@ public int docID() { public DocIdSetIterator iterator() { return scorer.iterator(); } - - @Override - public IntervalIterator intervals(String field) { - return scorer.intervals(field); - } - + }; super.setScorer(s); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index 5b5d5812559b..bd5d927c6275 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -151,7 +151,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); - return new PayloadSpanScorer(this, field, payloadSpans, docScorer); + return new PayloadSpanScorer(this, payloadSpans, docScorer); } @Override @@ -227,8 +227,8 @@ private class PayloadSpanScorer extends SpanScorer { private final PayloadSpans spans; - private PayloadSpanScorer(SpanWeight weight, String field, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { - super(weight, field, spans, docScorer); + private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { + super(weight, spans, docScorer); this.spans = spans; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index dbee623c59b6..a9d3bfb2da9a 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -128,7 +128,7 @@ public SpanScorer scorer(LeafReaderContext context) throws IOException { return null; } final LeafSimScorer docScorer = getSimScorer(context); - return new SpanScorer(this, field, spans, docScorer); + return new SpanScorer(this, spans, docScorer); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index e6eeae907da8..80cd4da7cf0d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -112,11 +112,6 @@ public Collection getChildren() { return Collections.singletonList(new ChildScorer(in, "SHOULD")); } - @Override - public IntervalIterator intervals(String field) { - return in.intervals(field); - } - @Override public int docID() { return in.docID(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java index 4b982bb45a8d..3b9a740a448f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BlockScoreQueryWrapper.java @@ -196,10 +196,6 @@ public float getMaxScore(int upTo) throws IOException { return max; } - @Override - public IntervalIterator intervals(String field) { - throw new UnsupportedOperationException(); - } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java index a8ef239d93cb..9206b0484d4d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BulkScorerWrapperScorer.java @@ -115,9 +115,4 @@ public long cost() { }; } - @Override - public IntervalIterator intervals(String field) { - return null; - } - } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index fcb48a8d0e84..a050b50401cb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -108,11 +108,6 @@ public float score() throws IOException { return scorer.score(); } - @Override - public IntervalIterator intervals(String field) { - return scorer.intervals(field); - } - @Override public int advanceShallow(int target) throws IOException { return scorer.advanceShallow(target); From fed92ee7ebd9fc6b52aa006429d090d5e4732f22 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 6 Mar 2018 18:16:43 +0000 Subject: [PATCH 60/83] Fix nested disjunctions (LUCENE-7398) --- .../search/ConjunctionIntervalIterator.java | 4 + .../search/DisjunctionIntervalsSource.java | 90 ++++++++++++++++++- .../apache/lucene/search/IntervalFilter.java | 5 ++ .../lucene/search/IntervalFunction.java | 5 +- .../lucene/search/TestIntervalQuery.java | 12 ++- 5 files changed, 110 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index 53139fe42ec8..f6457d2ffd75 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -63,4 +63,8 @@ public final float cost() { return cost; } + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 053ddd4a02f5..025bba1cdb19 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -90,7 +90,10 @@ private static class DisjunctionIntervalIterator implements IntervalIterator { this.intervalQueue = new PriorityQueue(iterators.size()) { @Override protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + // This is different to the Vigna paper, because we're interested in matching rather + // than in minimizing intervals, so a wider interval should sort before its prefixes + return a.start() < b.start() || (a.start() == b.start() && a.end() > b.end()); + //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; this.disiQueue = new DisiPriorityQueue(iterators.size()); @@ -142,13 +145,13 @@ public boolean advanceTo(int doc) throws IOException { intervalQueue.add(it); } } - current = null; + current = UNPOSITIONED; return intervalQueue.size() > 0; } @Override public int nextInterval() throws IOException { - if (current == null) { + if (current == UNPOSITIONED) { current = intervalQueue.top(); return current.start(); } @@ -160,16 +163,95 @@ public int nextInterval() throws IOException { } } if (intervalQueue.size() == 0) { - current = null; + current = EMPTY; return IntervalIterator.NO_MORE_INTERVALS; } current = intervalQueue.top(); return current.start(); } + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } + private boolean contains(IntervalIterator it, int start, int end) { return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); } } + + private static final IntervalIterator EMPTY = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return false; + } + + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextInterval() throws IOException { + return NO_MORE_INTERVALS; + } + + @Override + public float cost() { + return 0; + } + }; + + private static final IntervalIterator UNPOSITIONED = new IntervalIterator() { + @Override + public DocIdSetIterator approximation() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean advanceTo(int doc) throws IOException { + return false; + } + + @Override + public int start() { + return -1; + } + + @Override + public int end() { + return -1; + } + + @Override + public int innerWidth() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextInterval() throws IOException { + return NO_MORE_INTERVALS; + } + + @Override + public float cost() { + return 0; + } + }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index b968f88ffd96..e903ceb75223 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -49,6 +49,11 @@ protected boolean accept() { int width = innerWidth(); return width >= minWidth && width <= maxWidth; } + + @Override + public String toString() { + return "widthfilter(" + minWidth + "," + maxWidth + "," + in.toString() + ")"; + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index ff2a338bae8a..384e4ac0b691 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -157,7 +157,10 @@ public int nextInterval() throws IOException { start = subIterators.get(0).start(); end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); - innerWidth = b - subIterators.get(0).end() - 1; + innerWidth = 0; + for (int j = 1; j < subIterators.size(); j++) { + innerWidth += subIterators.get(j).start() - subIterators.get(j - 1).end() - 1; + } i = 1; if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 33fd8c18065f..e2ab2af7c972 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -64,7 +64,9 @@ public void tearDown() throws Exception { "w1 xx w2 w4 yy w3", "w1 w3 xx w2 yy w3", "w2 w1", - "w2 w1 w3 w2 w4" + "w2 w1 w3 w2 w4", + "coordinate genome mapping research", + "coordinate genome research" }; private void checkHits(Query query, int[] results) throws IOException { @@ -157,4 +159,12 @@ public void testNotContainedByQuery() throws IOException { checkHits(q, new int[]{ 1, 3, 4, 5 }); } + public void testNestedOr() throws IOException { + Query q = new IntervalQuery(field, Intervals.orderedNear(0, + Intervals.term("coordinate"), + Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")), + Intervals.term("research"))); + checkHits(q, new int[]{ 6, 7 }); + } + } From 5a58ee0766830c19bf02c64965a564ea388d28de Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 03:18:32 +0000 Subject: [PATCH 61/83] Remove slop/innerwidth, add BLOCK and MAXWIDTH --- .../search/DifferenceIntervalFunction.java | 9 - .../search/DisjunctionIntervalsSource.java | 15 -- .../lucene/search/FilterIntervalIterator.java | 5 - .../apache/lucene/search/IntervalFilter.java | 33 ---- .../lucene/search/IntervalFunction.java | 172 ++++++------------ .../lucene/search/IntervalIterator.java | 7 +- .../apache/lucene/search/IntervalQuery.java | 5 +- .../org/apache/lucene/search/Intervals.java | 68 ++----- .../apache/lucene/search/IntervalsSource.java | 2 + .../lucene/search/LowpassIntervalsSource.java | 71 ++++++++ .../lucene/search/TermIntervalsSource.java | 10 +- .../lucene/search/TestIntervalQuery.java | 20 +- 12 files changed, 156 insertions(+), 261 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 2ee7d36d78ac..e5310f9482fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -95,11 +95,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public boolean advanceTo(int doc) throws IOException { bpos = b.advanceTo(doc); @@ -191,10 +186,6 @@ public int end() { return newEnd; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } }; return NON_OVERLAPPING.apply(minuend, notWithin); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 025bba1cdb19..1327c131d894 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -126,11 +126,6 @@ public int end() { return current.end(); } - @Override - public int innerWidth() { - return current.innerWidth(); - } - @Override public boolean advanceTo(int doc) throws IOException { intervalQueue.clear(); @@ -202,11 +197,6 @@ public int end() { return NO_MORE_INTERVALS; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } - @Override public int nextInterval() throws IOException { return NO_MORE_INTERVALS; @@ -239,11 +229,6 @@ public int end() { return -1; } - @Override - public int innerWidth() { - throw new UnsupportedOperationException(); - } - @Override public int nextInterval() throws IOException { return NO_MORE_INTERVALS; diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 358aee4997ac..bb4d736fe3ec 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -37,11 +37,6 @@ public int end() { return in.end(); } - @Override - public int innerWidth() { - return in.innerWidth(); - } - @Override public DocIdSetIterator approximation() { return in.approximation(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index e903ceb75223..aa074039bdd5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -24,39 +24,6 @@ */ public abstract class IntervalFilter extends FilterIntervalIterator { - /** - * Filter an {@link IntervalIterator} by its outer width, ie the distance between the - * start and end of the iterator - */ - public static IntervalIterator widthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = end() - start(); - return width >= minWidth && width <= maxWidth; - } - }; - } - - /** - * Filter an {@link IntervalIterator} by its inner width, ie the distance between the - * end of its first subiterator and the beginning of its last - */ - public static IntervalIterator innerWidthFilter(IntervalIterator in, int minWidth, int maxWidth) { - return new IntervalFilter(in) { - @Override - protected boolean accept() { - int width = innerWidth(); - return width >= minWidth && width <= maxWidth; - } - - @Override - public String toString() { - return "widthfilter(" + minWidth + "," + maxWidth + "," + in.toString() + ")"; - } - }; - } - /** * Create a new filter */ diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 384e4ac0b691..860815ab99c4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -42,73 +42,80 @@ public abstract class IntervalFunction { */ public abstract IntervalIterator apply(List iterators); - /** - * Return an iterator over intervals where the subiterators appear in a given order - */ - public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + public static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { @Override - public IntervalIterator apply(List intervalIterators) { - return orderedIntervalIterator(intervalIterators); + public IntervalIterator apply(List iterators) { + return new BlockIntervalIterator(iterators); } }; - /** - * Return an iterator over intervals where the subiterators appear in a given order, - * filtered by width - */ - public static class OrderedNearFunction extends IntervalFunction { - - /** - * Create a new OrderedNearFunction - * @param minWidth the minimum width of returned intervals - * @param maxWidth the maximum width of returned intervals - */ - public OrderedNearFunction(int minWidth, int maxWidth) { - this.minWidth = minWidth; - this.maxWidth = maxWidth; + private static class BlockIntervalIterator extends ConjunctionIntervalIterator { + + int start, end; + + BlockIntervalIterator(List subIterators) { + super(subIterators); } - final int minWidth; - final int maxWidth; + @Override + protected void reset() throws IOException { + start = end = -1; + } @Override - public IntervalIterator apply(List intervalIterators) { - return IntervalFilter.innerWidthFilter(orderedIntervalIterator(intervalIterators), minWidth, maxWidth); + public int start() { + return start; } @Override - public String toString() { - return "ONEAR[" + minWidth + "/" + maxWidth + "]"; + public int end() { + return end; } @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - OrderedNearFunction that = (OrderedNearFunction) o; - return minWidth == that.minWidth && - maxWidth == that.maxWidth; + public float score() { + return 1; } @Override - public int hashCode() { - return Objects.hash(minWidth, maxWidth); + public int nextInterval() throws IOException { + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + int i = 1; + while (i < subIterators.size()) { + while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) { + if (subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + } + if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) { + i = i + 1; + } + else { + if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + return NO_MORE_INTERVALS; + i = 1; + } + } + start = subIterators.get(0).start(); + end = subIterators.get(subIterators.size() - 1).end(); + return start; } } - private static IntervalIterator orderedIntervalIterator(List subIterators) { - for (IntervalIterator it : subIterators) { - if (it == null) - return null; + /** + * Return an iterator over intervals where the subiterators appear in a given order + */ + public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + @Override + public IntervalIterator apply(List intervalIterators) { + return new OrderedIntervalIterator(intervalIterators); } - return new OrderedIntervalIterator(subIterators); - } + }; private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { int start; int end; - int innerWidth; int i; private OrderedIntervalIterator(List subIntervals) { @@ -125,16 +132,11 @@ public int end() { return end; } - @Override - public int innerWidth() { - return innerWidth; - } - @Override public void reset() throws IOException { subIterators.get(0).nextInterval(); i = 1; - start = end = innerWidth = Integer.MIN_VALUE; + start = end = Integer.MIN_VALUE; } @Override @@ -157,10 +159,6 @@ public int nextInterval() throws IOException { start = subIterators.get(0).start(); end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); - innerWidth = 0; - for (int j = 1; j < subIterators.size(); j++) { - innerWidth += subIterators.get(j).start() - subIterators.get(j - 1).end() - 1; - } i = 1; if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return start; @@ -174,64 +172,16 @@ public int nextInterval() throws IOException { public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { - return unorderedIntervalIterator(intervalIterators); + return new UnorderedIntervalIterator(intervalIterators); } }; - /** - * An iterator over intervals where the subiterators appear in any order, within a given width range - */ - public static class UnorderedNearFunction extends IntervalFunction { - - final int minWidth; - final int maxWidth; - - /** - * Create a new UnorderedNearFunction - * @param minWidth the minimum width of the returned intervals - * @param maxWidth the maximum width of the returned intervals - */ - public UnorderedNearFunction(int minWidth, int maxWidth) { - this.minWidth = minWidth; - this.maxWidth = maxWidth; - } - - @Override - public IntervalIterator apply(List intervalIterators) { - return IntervalFilter.innerWidthFilter(unorderedIntervalIterator(intervalIterators), minWidth, maxWidth); - } - - @Override - public String toString() { - return "ONEAR[" + minWidth + "/" + maxWidth + "]"; - } - - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - UnorderedNearFunction that = (UnorderedNearFunction) o; - return minWidth == that.minWidth && - maxWidth == that.maxWidth; - } - - @Override - public int hashCode() { - return Objects.hash(minWidth, maxWidth); - } - } - - private static IntervalIterator unorderedIntervalIterator(List subIntervals) { - return new UnorderedIntervalIterator(subIntervals); - } - private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator { private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int start, end, innerStart, innerEnd, queueEnd; + int start, end, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -258,21 +208,15 @@ public int end() { return end; } - @Override - public int innerWidth() { - return innerEnd - innerStart + 1; - } - @Override public void reset() throws IOException { this.queue.clear(); - this.queueEnd = start = end = innerEnd = innerStart = -1; + this.queueEnd = start = end = -1; for (IntervalIterator subIterator : subIterators) { subIterator.nextInterval(); queue.add(subIterator); if (subIterator.end() > queueEnd) { queueEnd = subIterator.end(); - innerEnd = subIterator.start(); } } } @@ -281,7 +225,6 @@ void updateRightExtreme(IntervalIterator it) { int itEnd = it.end(); if (itEnd > queueEnd) { queueEnd = itEnd; - innerEnd = it.start(); } } @@ -298,7 +241,6 @@ public int nextInterval() throws IOException { return NO_MORE_INTERVALS; do { start = queue.top().start(); - innerStart = queue.top().end(); end = queueEnd; if (queue.top().end() == end) return start; @@ -337,11 +279,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public void reset() { bpos = true; @@ -389,11 +326,6 @@ public int end() { return a.end(); } - @Override - public int innerWidth() { - return a.innerWidth(); - } - @Override public void reset() throws IOException { bpos = true; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index d33a47530031..9dc8a0cd9ab4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -45,11 +45,6 @@ public interface IntervalIterator { */ int end(); - /** - * The width of the current interval - */ - int innerWidth(); - /** * Advance the iterator to the next interval * @@ -62,7 +57,7 @@ public interface IntervalIterator { * The score of the current interval */ default float score() { - return (float) (1.0 / (1 + innerWidth())); + return (float) (1.0 / (end() - start() + 1)); } float cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 1130d327613a..83a81b062a24 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -76,8 +76,11 @@ private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) termStats[termUpTo++] = termStatistics; } } + if (termUpTo == 0) { + return null; + } CollectionStatistics collectionStats = searcher.collectionStatistics(field); - return searcher.getSimilarity().scorer(boost, collectionStats, termStats); + return searcher.getSimilarity().scorer(boost, collectionStats, Arrays.copyOf(termStats, termUpTo)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 175b50762316..75989dc152c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -17,8 +17,12 @@ package org.apache.lucene.search; +import java.io.IOException; import java.util.Arrays; +import java.util.Set; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; /** @@ -48,7 +52,11 @@ public static IntervalsSource phrase(String... terms) { sources[i] = term(term); i++; } - return orderedNear(0, sources); + return phrase(sources); + } + + public static IntervalsSource phrase(IntervalsSource... subSources) { + return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK); } public static IntervalsSource or(IntervalsSource... subSources) { @@ -57,33 +65,8 @@ public static IntervalsSource or(IntervalsSource... subSources) { return new DisjunctionIntervalsSource(Arrays.asList(subSources)); } - /** - * Create an ordered {@link IntervalsSource} with a maximum width - * - * Returns intervals in which the subsources all appear in the given order, and - * in which the width of the interval over which the subsources appear is less than - * the defined width - * - * @param width the maximum width of subquery-spanning intervals that will match - * @param subSources an ordered set of {@link IntervalsSource} objects - */ - public static IntervalsSource orderedNear(int width, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(0, width)); - } - - /** - * Create an ordered {@link IntervalsSource} with a defined width range - * - * Returns intervals in which the subsources all appear in the given order, and in - * which the width of the interval over which the subsources appear is between the - * minimum and maximum defined widths - * - * @param minWidth the minimum width of subquery-spanning intervals that will match - * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subSources an ordered set of {@link IntervalsSource} objects - */ - public static IntervalsSource orderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.OrderedNearFunction(minWidth, maxWidth)); + public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { + return new LowpassIntervalsSource(subSource, width); } /** @@ -97,35 +80,6 @@ public static IntervalsSource ordered(IntervalsSource... subSources) { return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED); } - /** - * Create an unordered {@link IntervalsSource} with a maximum width - * - * Returns intervals in which the subsources all appear in any order, and in which - * the width of the interval over which the subsources appear is less than the - * defined width - * - * @param width the maximum width of subquery-spanning intervals that will match - * @param subSources an unordered set of queries - */ - public static IntervalsSource unorderedNear(int width, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(0, width)); - } - - /** - * Create an unordered {@link IntervalsSource} with a defined width range - * - * Returns intervals in which the subsources all appear in any order, and in which - * the width of the interval over which the subsources appear is between the minimum - * and maximum defined widths - * - * @param minWidth the minimum width of subquery-spanning intervals that will match - * @param maxWidth the maximum width of subquery-spanning intervals that will match - * @param subSources an unordered set of subsources - */ - public static IntervalsSource unorderedNear(int minWidth, int maxWidth, IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), new IntervalFunction.UnorderedNearFunction(minWidth, maxWidth)); - } - /** * Create an unordered {@link IntervalsSource} with an unbounded width range * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 3bdf1e50655d..bb0362171c5a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; @@ -37,4 +38,5 @@ public abstract class IntervalsSource { public abstract String toString(); public abstract void extractTerms(String field, Set terms); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java new file mode 100644 index 000000000000..39f24fbfb670 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +class LowpassIntervalsSource extends IntervalsSource { + + final IntervalsSource in; + final int maxWidth; + + LowpassIntervalsSource(IntervalsSource in, int maxWidth) { + this.in = in; + this.maxWidth = maxWidth; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + LowpassIntervalsSource that = (LowpassIntervalsSource) o; + return maxWidth == that.maxWidth && + Objects.equals(in, that.in); + } + + @Override + public String toString() { + return "MAXWIDTH/" + maxWidth + "(" + in + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + in.extractTerms(field, terms); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + IntervalIterator i = in.intervals(field, ctx); + return new IntervalFilter(i) { + @Override + protected boolean accept() { + return (i.end() - i.start()) + 1 <= maxWidth; + } + }; + } + + @Override + public int hashCode() { + return Objects.hash(in, maxWidth); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index c60af1273b3f..4f7650e0df3d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -77,11 +77,6 @@ public int end() { return pos; } - @Override - public int innerWidth() { - return 1; - } - @Override public int nextInterval() throws IOException { if (upto <= 0) @@ -90,6 +85,11 @@ public int nextInterval() throws IOException { return pos = pe.nextPosition(); } + @Override + public float score() { + return 1; + } + @Override public float cost() { return cost; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index e2ab2af7c972..6aa4f833af05 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -86,29 +86,29 @@ public void testScoring() throws IOException { } } - public void testOrderedNearQueryWidth0() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(0, Intervals.term("w1"), Intervals.term("w2"))), + public void testPhraseQuery() throws IOException { + checkHits(new IntervalQuery(field, Intervals.phrase(Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); } - public void testOrderedNearQueryWidth1() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(1, Intervals.term("w1"), Intervals.term("w2"))), + public void testOrderedNearQueryWidth3() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), new int[]{0, 1, 2, 5}); } - public void testOrderedNearQueryWidth2() throws IOException { - checkHits(new IntervalQuery(field, Intervals.orderedNear(2, Intervals.term("w1"), Intervals.term("w2"))), + public void testOrderedNearQueryWidth4() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxwidth(4, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), new int[]{0, 1, 2, 3, 5}); } public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) Query q = new IntervalQuery(field, - Intervals.orderedNear(1, + Intervals.ordered( Intervals.term("w1"), - Intervals.orderedNear(2, Intervals.term("w2"), Intervals.term("w3")))); + Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w2"), Intervals.term("w3"))))); - checkHits(q, new int[]{0, 1, 2}); + checkHits(q, new int[]{0, 1, 3}); } public void testUnorderedQuery() throws IOException { @@ -160,7 +160,7 @@ public void testNotContainedByQuery() throws IOException { } public void testNestedOr() throws IOException { - Query q = new IntervalQuery(field, Intervals.orderedNear(0, + Query q = new IntervalQuery(field, Intervals.phrase( Intervals.term("coordinate"), Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")), Intervals.term("research"))); From 51224a68242f93425465d2353d49664db373e80e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 16:04:53 +0000 Subject: [PATCH 62/83] javadocs --- .../org/apache/lucene/search/Intervals.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 75989dc152c8..84022db73cc5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef; /** - * Constructor functions for interval-based queries + * Constructor functions for {@link IntervalsSource} types * * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} * classes, implementing minimum-interval algorithms taken from the paper @@ -37,14 +37,23 @@ public final class Intervals { private Intervals() {} + /** + * Return an {@link IntervalsSource} exposing intervals for a term + */ public static IntervalsSource term(BytesRef term) { return new TermIntervalsSource(term); } + /** + * Return an {@link IntervalsSource} exposing intervals for a term + */ public static IntervalsSource term(String term) { return new TermIntervalsSource(new BytesRef(term)); } + /** + * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of terms + */ public static IntervalsSource phrase(String... terms) { IntervalsSource[] sources = new IntervalsSource[terms.length]; int i = 0; @@ -55,16 +64,27 @@ public static IntervalsSource phrase(String... terms) { return phrase(sources); } + /** + * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of IntervalsSources + */ public static IntervalsSource phrase(IntervalsSource... subSources) { return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK); } + /** + * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources + */ public static IntervalsSource or(IntervalsSource... subSources) { if (subSources.length == 1) return subSources[0]; return new DisjunctionIntervalsSource(Arrays.asList(subSources)); } + /** + * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals + * @param width the maximum width of intervals in the sub-source ot return + * @param subSource the sub-source to filter + */ public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { return new LowpassIntervalsSource(subSource, width); } From f3b15bb7e18f15f0bb7523b3623935301e9a26a9 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 7 Mar 2018 16:12:24 +0000 Subject: [PATCH 63/83] Field masking IntervalsSource --- .../org/apache/lucene/search/Intervals.java | 45 +++++++++++++++++++ .../apache/lucene/search/TestIntervals.java | 15 ++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 84022db73cc5..5f15df2b94d1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; @@ -188,6 +189,50 @@ public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } + public static IntervalsSource mask(String field, IntervalsSource in) { + return new FieldMaskIntervalsSource(field, in); + } + + private static class FieldMaskIntervalsSource extends IntervalsSource { + + final String field; + final IntervalsSource in; + + private FieldMaskIntervalsSource(String field, IntervalsSource in) { + this.field = field; + this.in = in; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldMaskIntervalsSource that = (FieldMaskIntervalsSource) o; + return Objects.equals(field, that.field) && + Objects.equals(in, that.in); + } + + @Override + public String toString() { + return "XFIELD/" + field + "(" + in + ")"; + } + + @Override + public void extractTerms(String field, Set terms) { + in.extractTerms(field, terms); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return in.intervals(this.field, ctx); + } + + @Override + public int hashCode() { + return Objects.hash(field, in); + } + } + // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 5f5222ea41d6..0ee7defa3db7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -87,7 +87,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - assertNull(source.intervals(field + "1", ctx)); + // assertNull(source.intervals(field + "1", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); if (intervals == null) @@ -174,4 +174,17 @@ public void testNesting() throws IOException { }); } + public void testCrossFieldMasking() throws IOException { + checkIntervals(Intervals.ordered(Intervals.mask("field2", Intervals.term("xanadu")), Intervals.term("interest")), + "field1", 1, new int[][]{ + { 1, 2 }, + {}, + {}, + {}, + {}, + {}, + {} + }); + } + } From d14e86448f687275ce0d7ced5f72b32309ab9d41 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 8 Mar 2018 10:35:29 +0000 Subject: [PATCH 64/83] javadocs, fix ORDERED contract --- .../lucene/search/IntervalFunction.java | 2 +- .../lucene/search/IntervalIterator.java | 22 ++++++++++++++++--- .../apache/lucene/search/TestIntervals.java | 17 ++++++++++++-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 860815ab99c4..17cb5aa1d8ff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -136,7 +136,7 @@ public int end() { public void reset() throws IOException { subIterators.get(0).nextInterval(); i = 1; - start = end = Integer.MIN_VALUE; + start = end = -1; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 9dc8a0cd9ab4..1e5a1f80def7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,8 +20,8 @@ import java.io.IOException; /** - * Defines methods to iterate over the intervals that a {@link Scorer} matches - * on a document + * Defines methods to iterate over the intervals that a term, phrase or more + * complex positional query matches on a document */ public interface IntervalIterator { @@ -31,17 +31,28 @@ public interface IntervalIterator { */ int NO_MORE_INTERVALS = Integer.MAX_VALUE; + /** + * An iterator over documents that might have matching intervals + */ DocIdSetIterator approximation(); - boolean advanceTo(int doc) throws IOException; + /** + * Advances the iterator to {@code target}, returning {@code false} if there + * are definitely no matching intervals + */ + boolean advanceTo(int target) throws IOException; /** * The start of the current interval + * + * Returns -1 if {@link #nextInterval()} has not yet been called */ int start(); /** * The end of the current interval + * + * Returns -1 if {@link #nextInterval()} has not yet been called */ int end(); @@ -60,6 +71,11 @@ default float score() { return (float) (1.0 / (end() - start() + 1)); } + /** + * An indication of the cost of finding the next interval + * + * @see TwoPhaseIterator#matchCost() + */ float cost(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 0ee7defa3db7..d63438043883 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -87,7 +87,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; for (LeafReaderContext ctx : searcher.leafContexts) { - // assertNull(source.intervals(field + "1", ctx)); + assertNull(source.intervals(field + "fake", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); if (intervals == null) @@ -97,8 +97,10 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa int id = (int) ids.longValue(); if (intervals.advanceTo(doc)) { int i = 0, pos; + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); @@ -139,6 +141,17 @@ public void testOrderedNearIntervals() throws IOException { }); } + public void testPhraseIntervals() throws IOException { + checkIntervals(Intervals.phrase("pease", "porridge"), "field1", 3, new int[][]{ + {}, + { 0, 1, 3, 4, 6, 7 }, + { 0, 1, 3, 4, 6, 7 }, + {}, + { 0, 1, 3, 4, 6, 7 }, + {} + }); + } + public void testUnorderedNearIntervals() throws IOException { checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ From b468ea5ef95b1fe4bc4902c57787544ba924531a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 8 Mar 2018 17:53:05 +0000 Subject: [PATCH 65/83] Simplify advanceTo(int) -> reset() --- .../search/ConjunctionIntervalIterator.java | 12 ---------- .../search/DifferenceIntervalFunction.java | 12 ++++++---- .../search/DisjunctionIntervalsSource.java | 23 +++++-------------- .../lucene/search/FilterIntervalIterator.java | 4 ++-- .../lucene/search/IntervalFunction.java | 15 ++++++++++-- .../lucene/search/IntervalIterator.java | 6 +---- .../apache/lucene/search/IntervalScorer.java | 3 ++- .../lucene/search/TermIntervalsSource.java | 11 ++------- .../apache/lucene/search/TestIntervals.java | 4 +++- 9 files changed, 37 insertions(+), 53 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index f6457d2ffd75..0290bcf982f1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -46,18 +46,6 @@ public final DocIdSetIterator approximation() { return approximation; } - @Override - public final boolean advanceTo(int doc) throws IOException { - for (IntervalIterator it : subIterators) { - if (it.advanceTo(doc) == false) - return false; - } - reset(); - return true; - } - - protected abstract void reset() throws IOException; - @Override public final float cost() { return cost; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index e5310f9482fd..fca1562beb49 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -96,11 +96,15 @@ public int end() { } @Override - public boolean advanceTo(int doc) throws IOException { - bpos = b.advanceTo(doc); - if (bpos) + public void reset() throws IOException { + int doc = a.approximation().docID(); + bpos = b.approximation().docID() == doc || + (b.approximation().docID() < doc && b.approximation().advance(doc) == doc); + if (bpos) { + b.reset(); bpos = b.nextInterval() != NO_MORE_INTERVALS; - return a.advanceTo(doc); + } + a.reset(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 1327c131d894..4308e5065c5b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -127,21 +127,14 @@ public int end() { } @Override - public boolean advanceTo(int doc) throws IOException { + public void reset() throws IOException { intervalQueue.clear(); - int approxDoc = this.approximation.docID(); - if (approxDoc > doc || (approxDoc != doc && this.approximation.advance(doc) != doc)) { - return false; - } for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - IntervalIterator it = dw.intervals; - if (it.advanceTo(doc)) { - it.nextInterval(); - intervalQueue.add(it); - } + dw.intervals.reset(); + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); } current = UNPOSITIONED; - return intervalQueue.size() > 0; } @Override @@ -183,9 +176,7 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return false; - } + public void reset() throws IOException { } @Override public int start() { @@ -215,9 +206,7 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return false; - } + public void reset() throws IOException { } @Override public int start() { diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index bb4d736fe3ec..ddb2aa358e09 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -43,8 +43,8 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { - return in.advanceTo(doc); + public void reset() throws IOException { + in.reset(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 17cb5aa1d8ff..c3e985a6332a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -58,7 +58,10 @@ private static class BlockIntervalIterator extends ConjunctionIntervalIterator { } @Override - protected void reset() throws IOException { + public void reset() throws IOException { + for (IntervalIterator it : subIterators) { + it.reset(); + } start = end = -1; } @@ -134,6 +137,9 @@ public int end() { @Override public void reset() throws IOException { + for (IntervalIterator it : subIterators) { + it.reset(); + } subIterators.get(0).nextInterval(); i = 1; start = end = -1; @@ -213,6 +219,7 @@ public void reset() throws IOException { this.queue.clear(); this.queueEnd = start = end = -1; for (IntervalIterator subIterator : subIterators) { + subIterator.reset(); subIterator.nextInterval(); queue.add(subIterator); if (subIterator.end() > queueEnd) { @@ -280,7 +287,9 @@ public int end() { } @Override - public void reset() { + public void reset() throws IOException { + a.reset(); + b.reset(); bpos = true; } @@ -328,6 +337,8 @@ public int end() { @Override public void reset() throws IOException { + a.reset(); + b.reset(); bpos = true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 1e5a1f80def7..a14609474615 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -36,11 +36,7 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); - /** - * Advances the iterator to {@code target}, returning {@code false} if there - * are definitely no matching intervals - */ - boolean advanceTo(int target) throws IOException; + void reset() throws IOException; /** * The start of the current interval diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index d70e93d7705c..b0b8fc24a496 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -81,7 +81,8 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - return intervals.advanceTo(docID()) && intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; + intervals.reset(); + return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 4f7650e0df3d..95e772ca142b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -55,16 +55,9 @@ public DocIdSetIterator approximation() { } @Override - public boolean advanceTo(int doc) throws IOException { + public void reset() throws IOException { pos = -1; - if (pe.docID() > doc || (pe.docID() != doc && pe.advance(doc) != doc)) { - upto = -1; - return false; - } - else { - upto = pe.freq(); - return true; - } + upto = pe.freq(); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index d63438043883..bc89efc70f63 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -95,7 +95,9 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.advanceTo(doc)) { + if (intervals.approximation().docID() == doc || + (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { + intervals.reset(); int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); From 0a6eff5a3bd3951d41e3e1dc0cab163b53c713d5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 10:05:23 +0000 Subject: [PATCH 66/83] IntervalFunction and DifferenceIntervalFunction are package-private --- .../lucene/search/DifferenceIntervalFunction.java | 10 +++++----- .../org/apache/lucene/search/IntervalFunction.java | 12 ++++++------ .../src/java/org/apache/lucene/search/Intervals.java | 3 +-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index fca1562beb49..0b93ffb573f7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -24,7 +24,7 @@ * A function that takes two interval iterators and combines them to produce a third, * generally by computing a difference interval between them */ -public abstract class DifferenceIntervalFunction { +abstract class DifferenceIntervalFunction { @Override public abstract int hashCode(); @@ -44,7 +44,7 @@ public abstract class DifferenceIntervalFunction { * Filters the minuend iterator so that only intervals that do not overlap intervals from the * subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { + static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NonOverlappingIterator(minuend, subtrahend); @@ -55,7 +55,7 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah * Filters the minuend iterator so that only intervals that do not contain intervals from the * subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { + static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NotContainingIterator(minuend, subtrahend); @@ -66,7 +66,7 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah * Filters the minuend iterator so that only intervals that are not contained by intervals from * the subtrahend iterator are returned */ - public static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { + static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { return new NotContainedByIterator(minuend, subtrahend); @@ -146,7 +146,7 @@ public int nextInterval() throws IOException { * Filters the minuend iterator so that only intervals that do not occur within a set number * of positions of intervals from the subtrahend iterator are returned */ - public static class NotWithinFunction extends DifferenceIntervalFunction { + static class NotWithinFunction extends DifferenceIntervalFunction { private final int positions; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index c3e985a6332a..d715d635c203 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -26,7 +26,7 @@ /** * Combine a list of {@link IntervalIterator}s into another */ -public abstract class IntervalFunction { +abstract class IntervalFunction { @Override public abstract int hashCode(); @@ -42,7 +42,7 @@ public abstract class IntervalFunction { */ public abstract IntervalIterator apply(List iterators); - public static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { + static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { @Override public IntervalIterator apply(List iterators) { return new BlockIntervalIterator(iterators); @@ -108,7 +108,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the subiterators appear in a given order */ - public static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { + static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { @Override public IntervalIterator apply(List intervalIterators) { return new OrderedIntervalIterator(intervalIterators); @@ -175,7 +175,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the subiterators appear in any order */ - public static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { + static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { @Override public IntervalIterator apply(List intervalIterators) { return new UnorderedIntervalIterator(intervalIterators); @@ -265,7 +265,7 @@ public int nextInterval() throws IOException { /** * Returns an interval over iterators where the first iterator contains intervals from the second */ - public static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { + static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { @Override public IntervalIterator apply(List iterators) { if (iterators.size() != 2) @@ -314,7 +314,7 @@ public int nextInterval() throws IOException { /** * Return an iterator over intervals where the first iterator is contained by intervals from the second */ - public static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { + static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { @Override public IntervalIterator apply(List iterators) { if (iterators.size() != 2) diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index 5f15df2b94d1..f7531fc75def 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -29,8 +29,7 @@ /** * Constructor functions for {@link IntervalsSource} types * - * These queries use {@link IntervalFunction} or {@link DifferenceIntervalFunction} - * classes, implementing minimum-interval algorithms taken from the paper + * These sources implement minimum-interval algorithms taken from the paper * * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics */ From 1de09321b4a9a486c5b995eb322f2b8c12aca8e1 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 11:51:48 +0000 Subject: [PATCH 67/83] feedback --- .../lucene/search/IntervalIterator.java | 11 ++++- .../org/apache/lucene/search/Intervals.java | 44 ------------------- .../apache/lucene/search/IntervalsSource.java | 19 +++++++- .../lucene/search/TermIntervalsSource.java | 3 ++ .../apache/lucene/search/TestIntervals.java | 22 ++++------ 5 files changed, 39 insertions(+), 60 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index a14609474615..740ba97e7966 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -22,6 +22,11 @@ /** * Defines methods to iterate over the intervals that a term, phrase or more * complex positional query matches on a document + * + * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the + * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call + * {@link #reset()}, and then {@link #nextInterval()} to retrieve intervals until + * {@link #NO_MORE_INTERVALS} is returned. */ public interface IntervalIterator { @@ -36,6 +41,10 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); + /** + * Prepare to iterate over the intervals in a document after the approximation + * {@link DocIdSetIterator} has been advanced. + */ void reset() throws IOException; /** @@ -68,7 +77,7 @@ default float score() { } /** - * An indication of the cost of finding the next interval + * An indication of the average cost of iterating over all intervals in a document * * @see TwoPhaseIterator#matchCost() */ diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/core/src/java/org/apache/lucene/search/Intervals.java index f7531fc75def..aa29fa2f4b99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/core/src/java/org/apache/lucene/search/Intervals.java @@ -188,50 +188,6 @@ public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); } - public static IntervalsSource mask(String field, IntervalsSource in) { - return new FieldMaskIntervalsSource(field, in); - } - - private static class FieldMaskIntervalsSource extends IntervalsSource { - - final String field; - final IntervalsSource in; - - private FieldMaskIntervalsSource(String field, IntervalsSource in) { - this.field = field; - this.in = in; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FieldMaskIntervalsSource that = (FieldMaskIntervalsSource) o; - return Objects.equals(field, that.field) && - Objects.equals(in, that.in); - } - - @Override - public String toString() { - return "XFIELD/" + field + "(" + in + ")"; - } - - @Override - public void extractTerms(String field, Set terms) { - in.extractTerms(field, terms); - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - return in.intervals(this.field, ctx); - } - - @Override - public int hashCode() { - return Objects.hash(field, in); - } - } - // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index bb0362171c5a..86905020f76c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -24,10 +24,27 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +/** + * A helper class for {@link IntervalQuery} that provides an {@link IntervalIterator} + * for a given field and segment + */ public abstract class IntervalsSource { + /** + * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} + * + * @param field the field to read positions from + * @param ctx the context for which to return the iterator + */ public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; + /** + * Expert: collect {@link Term} objects from this source, to be used for top-level term scoring + * @param field the field to be scored + * @param terms a {@link Set} which terms should be added to + */ + public abstract void extractTerms(String field, Set terms); + @Override public abstract int hashCode(); @@ -37,6 +54,4 @@ public abstract class IntervalsSource { @Override public abstract String toString(); - public abstract void extractTerms(String field, Set terms); - } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 95e772ca142b..25139bb86ca3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -41,6 +41,9 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO Terms terms = ctx.reader().terms(field); if (terms == null) return null; + if (terms.hasPositions() == false) { + throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field + " because it has no indexed positions"); + } TermsEnum te = terms.iterator(); te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index bc89efc70f63..92cd8483b970 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; @@ -72,6 +73,7 @@ public static void setupIndex() throws IOException { Document doc = new Document(); doc.add(new TextField("field1", field1_docs[i], Field.Store.NO)); doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); + doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); doc.add(new NumericDocValuesField("id", i)); writer.addDocument(doc); } @@ -120,6 +122,13 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa assertEquals(expectedMatchCount, matchedDocs); } + public void testIntervalsOnFieldWithNoPositions() throws IOException { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { + Intervals.term("wibble").intervals("id", searcher.leafContexts.get(0)); + }); + assertEquals("Cannot create an IntervalIterator over field id because it has no indexed positions", e.getMessage()); + } + public void testTermQueryIntervals() throws IOException { checkIntervals(Intervals.term("porridge"), "field1", 4, new int[][]{ {}, @@ -189,17 +198,4 @@ public void testNesting() throws IOException { }); } - public void testCrossFieldMasking() throws IOException { - checkIntervals(Intervals.ordered(Intervals.mask("field2", Intervals.term("xanadu")), Intervals.term("interest")), - "field1", 1, new int[][]{ - { 1, 2 }, - {}, - {}, - {}, - {}, - {}, - {} - }); - } - } From 847d95fd2053cfa966b144469ef655ec206acbb2 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 17:05:46 +0000 Subject: [PATCH 68/83] Remove specialised score() method on IntervalIterator --- .../lucene/search/FilterIntervalIterator.java | 5 ----- .../org/apache/lucene/search/IntervalFunction.java | 5 ----- .../org/apache/lucene/search/IntervalIterator.java | 7 ------- .../org/apache/lucene/search/IntervalScorer.java | 2 +- .../apache/lucene/search/TermIntervalsSource.java | 5 ----- .../org/apache/lucene/search/TestIntervalQuery.java | 13 ------------- 6 files changed, 1 insertion(+), 36 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index ddb2aa358e09..45b9870782d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -52,11 +52,6 @@ public int nextInterval() throws IOException { return in.nextInterval(); } - @Override - public float score() { - return in.score(); - } - @Override public float cost() { return in.cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index d715d635c203..d2eee57a6f26 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -75,11 +75,6 @@ public int end() { return end; } - @Override - public float score() { - return 1; - } - @Override public int nextInterval() throws IOException { if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 740ba97e7966..d5911accbe4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -69,13 +69,6 @@ public interface IntervalIterator { */ int nextInterval() throws IOException; - /** - * The score of the current interval - */ - default float score() { - return (float) (1.0 / (end() - start() + 1)); - } - /** * An indication of the average cost of iterating over all intervals in a document * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index b0b8fc24a496..c1b73b684bea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -65,7 +65,7 @@ private void ensureFreq() throws IOException { lastScoredDoc = docID(); freq = 0; do { - freq += intervals.score(); + freq += (1.0 / (intervals.end() - intervals.start() + 1)); } while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 25139bb86ca3..224563943088 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -81,11 +81,6 @@ public int nextInterval() throws IOException { return pos = pe.nextPosition(); } - @Override - public float score() { - return 1; - } - @Override public float cost() { return cost; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java index 6aa4f833af05..6c1ba8ff6c1b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java @@ -73,19 +73,6 @@ private void checkHits(Query query, int[] results) throws IOException { CheckHits.checkHits(random(), query, field, searcher, results); } - public void testScoring() throws IOException { - PhraseQuery pq = new PhraseQuery.Builder().add(new Term(field, "w2")).add(new Term(field, "w3")).build(); - Query equiv = new IntervalQuery(field, Intervals.phrase("w2", "w3")); - - TopDocs td1 = searcher.search(pq, 10); - TopDocs td2 = searcher.search(equiv, 10); - assertEquals(td1.totalHits, td2.totalHits); - for (int i = 0; i < td1.scoreDocs.length; i++) { - assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc); - assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 0f); - } - } - public void testPhraseQuery() throws IOException { checkHits(new IntervalQuery(field, Intervals.phrase(Intervals.term("w1"), Intervals.term("w2"))), new int[]{0}); From b3afcc1efb07b793bfce742a0200982337240a32 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 17:58:29 +0000 Subject: [PATCH 69/83] Remove reset() and track via the approximation docid --- .../search/DifferenceIntervalFunction.java | 19 ++-- .../search/DisjunctionIntervalsSource.java | 58 ++-------- .../lucene/search/FilterIntervalIterator.java | 5 - .../lucene/search/IntervalFunction.java | 102 +++++++++--------- .../lucene/search/IntervalIterator.java | 6 -- .../apache/lucene/search/IntervalScorer.java | 1 - .../lucene/search/TermIntervalsSource.java | 18 ++-- .../apache/lucene/search/TestIntervals.java | 1 - 8 files changed, 83 insertions(+), 127 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index 0b93ffb573f7..c6e7ff64688f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -79,6 +79,7 @@ private static abstract class RelativeIterator implements IntervalIterator { final IntervalIterator b; boolean bpos; + int doc = -1; RelativeIterator(IntervalIterator a, IntervalIterator b) { this.a = a; @@ -95,16 +96,13 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - int doc = a.approximation().docID(); - bpos = b.approximation().docID() == doc || - (b.approximation().docID() < doc && b.approximation().advance(doc) == doc); - if (bpos) { - b.reset(); - bpos = b.nextInterval() != NO_MORE_INTERVALS; + protected void checkDoc() throws IOException { + if (doc != a.approximation().docID()) { + doc = a.approximation().docID(); + bpos = (b.approximation().docID() == doc || + (b.approximation().docID() < doc && b.approximation().advance(doc) == doc)) && + b.nextInterval() != NO_MORE_INTERVALS; } - a.reset(); } @Override @@ -126,6 +124,7 @@ private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtra @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -203,6 +202,7 @@ private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrah @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -228,6 +228,7 @@ private static class NotContainedByIterator extends RelativeIterator { @Override public int nextInterval() throws IOException { + checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 4308e5065c5b..3ba5538eff82 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -83,7 +83,8 @@ private static class DisjunctionIntervalIterator implements IntervalIterator { final List iterators; final float matchCost; - IntervalIterator current; + IntervalIterator current = EMPTY; + int doc = -1; DisjunctionIntervalIterator(List iterators) { this.iterators = iterators; @@ -126,20 +127,15 @@ public int end() { return current.end(); } - @Override - public void reset() throws IOException { - intervalQueue.clear(); - for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - dw.intervals.reset(); - dw.intervals.nextInterval(); - intervalQueue.add(dw.intervals); - } - current = UNPOSITIONED; - } - @Override public int nextInterval() throws IOException { - if (current == UNPOSITIONED) { + if (doc != approximation.docID()) { + doc = approximation.docID(); + intervalQueue.clear(); + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); + } current = intervalQueue.top(); return current.start(); } @@ -175,39 +171,6 @@ public DocIdSetIterator approximation() { throw new UnsupportedOperationException(); } - @Override - public void reset() throws IOException { } - - @Override - public int start() { - return NO_MORE_INTERVALS; - } - - @Override - public int end() { - return NO_MORE_INTERVALS; - } - - @Override - public int nextInterval() throws IOException { - return NO_MORE_INTERVALS; - } - - @Override - public float cost() { - return 0; - } - }; - - private static final IntervalIterator UNPOSITIONED = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - throw new UnsupportedOperationException(); - } - - @Override - public void reset() throws IOException { } - @Override public int start() { return -1; @@ -219,7 +182,7 @@ public int end() { } @Override - public int nextInterval() throws IOException { + public int nextInterval() { return NO_MORE_INTERVALS; } @@ -228,4 +191,5 @@ public float cost() { return 0; } }; + } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java index 45b9870782d2..cf1843a1e6d0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java @@ -42,11 +42,6 @@ public DocIdSetIterator approximation() { return in.approximation(); } - @Override - public void reset() throws IOException { - in.reset(); - } - @Override public int nextInterval() throws IOException { return in.nextInterval(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index d2eee57a6f26..5df8c85031cd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -51,32 +51,34 @@ public IntervalIterator apply(List iterators) { private static class BlockIntervalIterator extends ConjunctionIntervalIterator { - int start, end; + int doc = -1, start = -1, end = -1; BlockIntervalIterator(List subIterators) { super(subIterators); } - @Override - public void reset() throws IOException { - for (IntervalIterator it : subIterators) { - it.reset(); - } - start = end = -1; - } - @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { + if (doc != approximation.docID()) { + return -1; + } return end; } @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + start = end = -1; + } if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return NO_MORE_INTERVALS; int i = 1; @@ -112,9 +114,7 @@ public IntervalIterator apply(List intervalIterators) { private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { - int start; - int end; - int i; + int doc = -1, start = -1, end = -1, i; private OrderedIntervalIterator(List subIntervals) { super(subIntervals); @@ -122,26 +122,28 @@ private OrderedIntervalIterator(List subIntervals) { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { - return end; - } - - @Override - public void reset() throws IOException { - for (IntervalIterator it : subIterators) { - it.reset(); + if (doc != approximation.docID()) { + return -1; } - subIterators.get(0).nextInterval(); - i = 1; - start = end = -1; + return end; } @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + subIterators.get(0).nextInterval(); + i = 1; + start = end = -1; + } start = end = NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; while (true) { @@ -182,7 +184,7 @@ private static class UnorderedIntervalIterator extends ConjunctionIntervalIterat private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int start, end, queueEnd; + int doc = -1, start = -1, end = -1, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -201,26 +203,18 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return start; } @Override public int end() { - return end; - } - - @Override - public void reset() throws IOException { - this.queue.clear(); - this.queueEnd = start = end = -1; - for (IntervalIterator subIterator : subIterators) { - subIterator.reset(); - subIterator.nextInterval(); - queue.add(subIterator); - if (subIterator.end() > queueEnd) { - queueEnd = subIterator.end(); - } + if (doc != approximation.docID()) { + return -1; } + return end; } void updateRightExtreme(IntervalIterator it) { @@ -232,6 +226,16 @@ void updateRightExtreme(IntervalIterator it) { @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + this.queue.clear(); + this.queueEnd = start = end = -1; + for (IntervalIterator it : subIterators) { + it.nextInterval(); + queue.add(it); + updateRightExtreme(it); + } + } while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { @@ -270,6 +274,7 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; + int doc = -1; @Override public int start() { @@ -281,15 +286,12 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - a.reset(); - b.reset(); - bpos = true; - } - @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + bpos = true; + } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -319,6 +321,7 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; + int doc = -1; @Override public int start() { @@ -330,15 +333,12 @@ public int end() { return a.end(); } - @Override - public void reset() throws IOException { - a.reset(); - b.reset(); - bpos = true; - } - @Override public int nextInterval() throws IOException { + if (doc != approximation.docID()) { + doc = approximation.docID(); + bpos = true; + } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index d5911accbe4d..c41888b91e87 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -41,12 +41,6 @@ public interface IntervalIterator { */ DocIdSetIterator approximation(); - /** - * Prepare to iterate over the intervals in a document after the approximation - * {@link DocIdSetIterator} has been advanced. - */ - void reset() throws IOException; - /** * The start of the current interval * diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index c1b73b684bea..279265f3dbff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -81,7 +81,6 @@ public TwoPhaseIterator twoPhaseIterator() { return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { - intervals.reset(); return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 224563943088..a9eb01e3937c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -50,31 +50,35 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO float cost = PhraseQuery.termPositionsCost(te); return new IntervalIterator() { - int pos, upto; + int doc = -1, pos = -1, upto; @Override public DocIdSetIterator approximation() { return pe; } - @Override - public void reset() throws IOException { - pos = -1; - upto = pe.freq(); - } - @Override public int start() { + if (doc != pe.docID()) { + return -1; + } return pos; } @Override public int end() { + if (doc != pe.docID()) { + return -1; + } return pos; } @Override public int nextInterval() throws IOException { + if (doc != pe.docID()) { + doc = pe.docID(); + upto = pe.freq(); + } if (upto <= 0) return pos = NO_MORE_INTERVALS; upto--; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index 92cd8483b970..ec28cef73b2d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -99,7 +99,6 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa int id = (int) ids.longValue(); if (intervals.approximation().docID() == doc || (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { - intervals.reset(); int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); From fc31c54d0cbf7fb8bf648574b4053471fbde560a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 9 Mar 2018 20:53:19 +0000 Subject: [PATCH 70/83] javadocs --- .../apache/lucene/search/DisjunctionIntervalsSource.java | 6 ++++++ .../src/java/org/apache/lucene/search/IntervalIterator.java | 3 +-- .../src/java/org/apache/lucene/search/IntervalsSource.java | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 3ba5538eff82..b2b5840bb335 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -119,11 +119,17 @@ public float cost() { @Override public int start() { + if (doc != approximation.docID()) { + return -1; + } return current.start(); } @Override public int end() { + if (doc != approximation.docID()) { + return -1; + } return current.end(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index c41888b91e87..7dea05990ae3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -25,8 +25,7 @@ * * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call - * {@link #reset()}, and then {@link #nextInterval()} to retrieve intervals until - * {@link #NO_MORE_INTERVALS} is returned. + * {@link #nextInterval()} to retrieve intervals until {@link #NO_MORE_INTERVALS} is returned. */ public interface IntervalIterator { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index 86905020f76c..e1d2fe1598cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -33,6 +33,8 @@ public abstract class IntervalsSource { /** * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} * + * Returns {@code null} if no intervals for this field exist in this segment + * * @param field the field to read positions from * @param ctx the context for which to return the iterator */ From 64c91e6947b45f9bf75b3bb145abe809795efa25 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 11 Mar 2018 20:17:58 +0000 Subject: [PATCH 71/83] Make IntervalIterator a DISI --- .../apache/lucene/search/ConjunctionDISI.java | 2 +- .../search/ConjunctionIntervalIterator.java | 23 ++--- .../search/DifferenceIntervalFunction.java | 60 ++++++++----- .../org/apache/lucene/search/DisiWrapper.java | 8 +- .../search/DisjunctionIntervalsSource.java | 59 +++++++------ .../lucene/search/FilterIntervalIterator.java | 54 ------------ .../apache/lucene/search/IntervalFilter.java | 27 +++++- .../lucene/search/IntervalFunction.java | 88 ++++++++----------- .../lucene/search/IntervalIterator.java | 61 ++++++++++--- .../apache/lucene/search/IntervalScorer.java | 6 +- .../lucene/search/TermIntervalsSource.java | 33 ++++--- .../apache/lucene/search/TestIntervals.java | 6 +- 12 files changed, 208 insertions(+), 219 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 780e854033a8..8ed42316a09f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -58,7 +58,7 @@ public static DocIdSetIterator intersectScorers(Collection scorers) { * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using * {@link TwoPhaseIterator#unwrap}. */ - public static DocIdSetIterator intersectIterators(List iterators) { + public static DocIdSetIterator intersectIterators(List iterators) { if (iterators.size() < 2) { throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java index 0290bcf982f1..cfda0de03f50 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java @@ -21,38 +21,25 @@ import java.util.ArrayList; import java.util.List; -abstract class ConjunctionIntervalIterator implements IntervalIterator { +abstract class ConjunctionIntervalIterator extends IntervalIterator { - protected final List subIterators; + final List subIterators; - final DocIdSetIterator approximation; final float cost; ConjunctionIntervalIterator(List subIterators) { + super(ConjunctionDISI.intersectIterators(subIterators)); this.subIterators = subIterators; float costsum = 0; - List approximations = new ArrayList<>(); for (IntervalIterator it : subIterators) { - costsum += it.cost(); - approximations.add(it.approximation()); + costsum += it.matchCost(); } this.cost = costsum; - this.approximation = ConjunctionDISI.intersectIterators(approximations); - - } - - @Override - public final DocIdSetIterator approximation() { - return approximation; } @Override - public final float cost() { + public final float matchCost() { return cost; } - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java index c6e7ff64688f..bc89bfed70b3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java @@ -73,19 +73,26 @@ public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrah } }; - private static abstract class RelativeIterator implements IntervalIterator { + private static abstract class RelativeIterator extends IntervalIterator { final IntervalIterator a; final IntervalIterator b; boolean bpos; - int doc = -1; RelativeIterator(IntervalIterator a, IntervalIterator b) { + super(a); this.a = a; this.b = b; } + @Override + protected void reset() throws IOException { + int doc = a.docID(); + bpos = b.docID() == doc || + (b.docID() < doc && b.advance(doc) == doc); + } + @Override public int start() { return a.start(); @@ -96,23 +103,9 @@ public int end() { return a.end(); } - protected void checkDoc() throws IOException { - if (doc != a.approximation().docID()) { - doc = a.approximation().docID(); - bpos = (b.approximation().docID() == doc || - (b.approximation().docID() < doc && b.approximation().advance(doc) == doc)) && - b.nextInterval() != NO_MORE_INTERVALS; - } - } - @Override - public DocIdSetIterator approximation() { - return a.approximation(); - } - - @Override - public float cost() { - return a.cost() + b.cost(); + public float matchCost() { + return a.matchCost() + b.matchCost(); } } @@ -124,7 +117,6 @@ private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtra @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -173,15 +165,22 @@ public int hashCode() { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new FilterIntervalIterator(subtrahend) { + IntervalIterator notWithin = new IntervalIterator(subtrahend) { + + boolean positioned = false; + @Override public int start() { + if (positioned == false) + return -1; int start = subtrahend.start(); return Math.max(0, start - positions); } @Override public int end() { + if (positioned == false) + return -1; int end = subtrahend.end(); int newEnd = end + positions; if (newEnd < 0) // check for overflow @@ -189,6 +188,25 @@ public int end() { return newEnd; } + @Override + public int nextInterval() throws IOException { + if (positioned == false) { + positioned = true; + } + return subtrahend.nextInterval(); + } + + @Override + public float matchCost() { + return subtrahend.matchCost(); + } + + @Override + protected void reset() throws IOException { + // already called when the subtrahend approximation is advanced + positioned = false; + } + }; return NON_OVERLAPPING.apply(minuend, notWithin); } @@ -202,7 +220,6 @@ private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrah @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -228,7 +245,6 @@ private static class NotContainedByIterator extends RelativeIterator { @Override public int nextInterval() throws IOException { - checkDoc(); if (bpos == false) return a.nextInterval(); while (a.nextInterval() != NO_MORE_INTERVALS) { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index aba5dff908a6..edca0e3a26a6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -92,12 +92,12 @@ public DisiWrapper(IntervalIterator iterator) { this.scorer = null; this.spans = null; this.intervals = iterator; - this.iterator = iterator.approximation(); - this.cost = iterator.approximation().cost(); + this.iterator = iterator; + this.cost = iterator.cost(); this.doc = -1; this.twoPhaseView = null; - this.approximation = iterator.approximation(); - this.matchCost = iterator.cost(); + this.approximation = iterator; + this.matchCost = iterator.matchCost(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index b2b5840bb335..2f7b01e88a8c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -75,18 +75,18 @@ public void extractTerms(String field, Set terms) { } } - private static class DisjunctionIntervalIterator implements IntervalIterator { + private static class DisjunctionIntervalIterator extends IntervalIterator { final PriorityQueue intervalQueue; final DisiPriorityQueue disiQueue; - final DisjunctionDISIApproximation approximation; final List iterators; final float matchCost; IntervalIterator current = EMPTY; - int doc = -1; DisjunctionIntervalIterator(List iterators) { + super(buildApproximation(iterators)); + this.disiQueue = ((DisjunctionDISIApproximation)approximation).subIterators; this.iterators = iterators; this.intervalQueue = new PriorityQueue(iterators.size()) { @Override @@ -97,52 +97,52 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); } }; - this.disiQueue = new DisiPriorityQueue(iterators.size()); float costsum = 0; for (IntervalIterator it : iterators) { - this.disiQueue.add(new DisiWrapper(it)); costsum += it.cost(); } this.matchCost = costsum; - this.approximation = new DisjunctionDISIApproximation(this.disiQueue); } - @Override - public DocIdSetIterator approximation() { - return approximation; + private static DocIdSetIterator buildApproximation(List iterators) { + DisiPriorityQueue disiQueue = new DisiPriorityQueue(iterators.size()); + for (IntervalIterator it : iterators) { + disiQueue.add(new DisiWrapper(it)); + } + return new DisjunctionDISIApproximation(disiQueue); } @Override - public float cost() { + public float matchCost() { return matchCost; } @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return current.start(); } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return current.end(); } + @Override + protected void reset() throws IOException { + intervalQueue.clear(); + for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { + dw.intervals.nextInterval(); + intervalQueue.add(dw.intervals); + } + current = EMPTY; + } + @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - intervalQueue.clear(); - for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - dw.intervals.nextInterval(); - intervalQueue.add(dw.intervals); + if (current == EMPTY) { + if (intervalQueue.size() > 0) { + current = intervalQueue.top(); } - current = intervalQueue.top(); return current.start(); } int start = current.start(), end = current.end(); @@ -171,11 +171,7 @@ private boolean contains(IntervalIterator it, int start, int end) { } - private static final IntervalIterator EMPTY = new IntervalIterator() { - @Override - public DocIdSetIterator approximation() { - throw new UnsupportedOperationException(); - } + private static final IntervalIterator EMPTY = new IntervalIterator(DocIdSetIterator.empty()) { @Override public int start() { @@ -193,9 +189,14 @@ public int nextInterval() { } @Override - public float cost() { + public float matchCost() { return 0; } + + @Override + protected void reset() throws IOException { + + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java deleted file mode 100644 index cf1843a1e6d0..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/FilterIntervalIterator.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import java.io.IOException; - -public abstract class FilterIntervalIterator implements IntervalIterator { - - protected final IntervalIterator in; - - protected FilterIntervalIterator(IntervalIterator in) { - this.in = in; - } - - @Override - public int start() { - return in.start(); - } - - @Override - public int end() { - return in.end(); - } - - @Override - public DocIdSetIterator approximation() { - return in.approximation(); - } - - @Override - public int nextInterval() throws IOException { - return in.nextInterval(); - } - - @Override - public float cost() { - return in.cost(); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java index aa074039bdd5..3add87d78243 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java @@ -22,13 +22,36 @@ /** * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function */ -public abstract class IntervalFilter extends FilterIntervalIterator { +public abstract class IntervalFilter extends IntervalIterator { + + private final IntervalIterator in; /** * Create a new filter */ public IntervalFilter(IntervalIterator in) { - super(in); + super(in.approximation); + this.in = in; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public float matchCost() { + return in.matchCost(); + } + + @Override + protected void reset() throws IOException { + in.reset(); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java index 5df8c85031cd..628c54cef4c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java @@ -51,7 +51,7 @@ public IntervalIterator apply(List iterators) { private static class BlockIntervalIterator extends ConjunctionIntervalIterator { - int doc = -1, start = -1, end = -1; + int start = -1, end = -1; BlockIntervalIterator(List subIterators) { super(subIterators); @@ -59,26 +59,16 @@ private static class BlockIntervalIterator extends ConjunctionIntervalIterator { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - start = end = -1; - } if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) return NO_MORE_INTERVALS; int i = 1; @@ -100,6 +90,11 @@ public int nextInterval() throws IOException { end = subIterators.get(subIterators.size() - 1).end(); return start; } + + @Override + protected void reset() { + start = end = -1; + } } /** @@ -114,7 +109,7 @@ public IntervalIterator apply(List intervalIterators) { private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { - int doc = -1, start = -1, end = -1, i; + int start = -1, end = -1, i; private OrderedIntervalIterator(List subIntervals) { super(subIntervals); @@ -122,30 +117,19 @@ private OrderedIntervalIterator(List subIntervals) { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - subIterators.get(0).nextInterval(); - i = 1; - start = end = -1; - } start = end = NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; + i = 1; while (true) { while (true) { if (subIterators.get(i - 1).end() >= b) @@ -167,6 +151,13 @@ public int nextInterval() throws IOException { return start; } } + + @Override + protected void reset() throws IOException { + subIterators.get(0).nextInterval(); + i = 1; + start = end = -1; + } } /** @@ -184,7 +175,7 @@ private static class UnorderedIntervalIterator extends ConjunctionIntervalIterat private final PriorityQueue queue; private final IntervalIterator[] subIterators; - int doc = -1, start = -1, end = -1, queueEnd; + int start = -1, end = -1, queueEnd; UnorderedIntervalIterator(List subIterators) { super(subIterators); @@ -203,17 +194,11 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { @Override public int start() { - if (doc != approximation.docID()) { - return -1; - } return start; } @Override public int end() { - if (doc != approximation.docID()) { - return -1; - } return end; } @@ -226,16 +211,6 @@ void updateRightExtreme(IntervalIterator it) { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - this.queue.clear(); - this.queueEnd = start = end = -1; - for (IntervalIterator it : subIterators) { - it.nextInterval(); - queue.add(it); - updateRightExtreme(it); - } - } while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { @@ -259,6 +234,17 @@ public int nextInterval() throws IOException { return start; } + @Override + protected void reset() throws IOException { + queueEnd = start = end = -1; + this.queue.clear(); + for (IntervalIterator it : subIterators) { + it.nextInterval(); + queue.add(it); + updateRightExtreme(it); + } + } + } /** @@ -274,7 +260,6 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; - int doc = -1; @Override public int start() { @@ -288,10 +273,6 @@ public int end() { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - bpos = true; - } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -304,6 +285,11 @@ public int nextInterval() throws IOException { } return NO_MORE_INTERVALS; } + + @Override + protected void reset() throws IOException { + bpos = true; + } }; } }; @@ -321,7 +307,6 @@ public IntervalIterator apply(List iterators) { return new ConjunctionIntervalIterator(iterators) { boolean bpos; - int doc = -1; @Override public int start() { @@ -335,10 +320,6 @@ public int end() { @Override public int nextInterval() throws IOException { - if (doc != approximation.docID()) { - doc = approximation.docID(); - bpos = true; - } if (bpos == false) return NO_MORE_INTERVALS; while (a.nextInterval() != NO_MORE_INTERVALS) { @@ -351,6 +332,11 @@ public int nextInterval() throws IOException { } return NO_MORE_INTERVALS; } + + @Override + protected void reset() throws IOException { + bpos = true; + } }; } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 7dea05990ae3..80451403237c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -23,36 +23,61 @@ * Defines methods to iterate over the intervals that a term, phrase or more * complex positional query matches on a document * - * The iterator is advanced by calling {@link DocIdSetIterator#advance(int)} on the - * DocIdSetIterator returned by {@link #approximation()}. Consumers should then call - * {@link #nextInterval()} to retrieve intervals until {@link #NO_MORE_INTERVALS} is returned. + * The iterator is advanced by calling {@link #advance(int)} or {@link #nextDoc()}. + * Consumers should then call {@link #nextInterval()} to retrieve intervals until + * {@link #NO_MORE_INTERVALS} is returned. */ -public interface IntervalIterator { +public abstract class IntervalIterator extends DocIdSetIterator { + + protected final DocIdSetIterator approximation; + + protected IntervalIterator(DocIdSetIterator approximation) { + this.approximation = approximation; + } /** * When returned from {@link #nextInterval()}, indicates that there are no more * matching intervals on the current document */ - int NO_MORE_INTERVALS = Integer.MAX_VALUE; + public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - /** - * An iterator over documents that might have matching intervals - */ - DocIdSetIterator approximation(); + @Override + public final int docID() { + return approximation.docID(); + } + + @Override + public final int nextDoc() throws IOException { + int doc = approximation.nextDoc(); + reset(); + return doc; + } + + @Override + public final int advance(int target) throws IOException { + int doc = approximation.advance(target); + reset(); + return doc; + } + + @Override + public final long cost() { + return approximation.cost(); + } /** * The start of the current interval * * Returns -1 if {@link #nextInterval()} has not yet been called */ - int start(); + public abstract int start(); /** * The end of the current interval * * Returns -1 if {@link #nextInterval()} has not yet been called */ - int end(); + public abstract int end(); /** * Advance the iterator to the next interval @@ -60,13 +85,23 @@ public interface IntervalIterator { * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if * there are no more intervals on the current document */ - int nextInterval() throws IOException; + public abstract int nextInterval() throws IOException; /** * An indication of the average cost of iterating over all intervals in a document * * @see TwoPhaseIterator#matchCost() */ - float cost(); + public abstract float matchCost(); + + /** + * Called when the underlying iterator has been advanced. + */ + protected abstract void reset() throws IOException; + + @Override + public String toString() { + return approximation.docID() + ":[" + start() + "->" + end() + "]"; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java index 279265f3dbff..279d3e00885c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java @@ -22,7 +22,6 @@ class IntervalScorer extends Scorer { private final IntervalIterator intervals; - private final DocIdSetIterator approximation; private final LeafSimScorer simScorer; private float freq = -1; @@ -31,13 +30,12 @@ class IntervalScorer extends Scorer { protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { super(weight); this.intervals = intervals; - this.approximation = intervals.approximation(); this.simScorer = simScorer; } @Override public int docID() { - return approximation.docID(); + return intervals.docID(); } @Override @@ -78,7 +76,7 @@ public DocIdSetIterator iterator() { @Override public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(approximation) { + return new TwoPhaseIterator(intervals) { @Override public boolean matches() throws IOException { return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index a9eb01e3937c..986b0f7fc479 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -48,37 +48,22 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); float cost = PhraseQuery.termPositionsCost(te); - return new IntervalIterator() { + return new IntervalIterator(pe) { - int doc = -1, pos = -1, upto; - - @Override - public DocIdSetIterator approximation() { - return pe; - } + int pos = -1, upto; @Override public int start() { - if (doc != pe.docID()) { - return -1; - } return pos; } @Override public int end() { - if (doc != pe.docID()) { - return -1; - } return pos; } @Override public int nextInterval() throws IOException { - if (doc != pe.docID()) { - doc = pe.docID(); - upto = pe.freq(); - } if (upto <= 0) return pos = NO_MORE_INTERVALS; upto--; @@ -86,10 +71,22 @@ public int nextInterval() throws IOException { } @Override - public float cost() { + public float matchCost() { return cost; } + @Override + protected void reset() throws IOException { + if (pe.docID() == NO_MORE_DOCS) { + upto = -1; + pos = NO_MORE_INTERVALS; + } + else { + upto = pe.freq(); + pos = -1; + } + } + @Override public String toString() { return pe.docID() + ":" + pos; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java index ec28cef73b2d..0e7f7ce604c5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java @@ -97,13 +97,13 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { ids.advance(doc); int id = (int) ids.longValue(); - if (intervals.approximation().docID() == doc || - (intervals.approximation().docID() < doc && intervals.approximation().advance(doc) == doc)) { + if (intervals.docID() == doc || + (intervals.docID() < doc && intervals.advance(doc) == doc)) { int i = 0, pos; assertEquals(-1, intervals.start()); assertEquals(-1, intervals.end()); while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals.start() + "->" + intervals.end()); + //System.out.println(doc + ": " + intervals); assertEquals(expected[id][i], pos); assertEquals(expected[id][i], intervals.start()); assertEquals(expected[id][i + 1], intervals.end()); From e6c3ae64f0656447b8d34e01f6c44c4e9d184b91 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 12 Mar 2018 09:09:11 +0000 Subject: [PATCH 72/83] Javadocs --- .../apache/lucene/search/IntervalIterator.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 80451403237c..1064ba062fa9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,12 +20,19 @@ import java.io.IOException; /** - * Defines methods to iterate over the intervals that a term, phrase or more - * complex positional query matches on a document + * A {@link DocIdSetIterator} than also allows iteration over matching + * intervals in a document. * - * The iterator is advanced by calling {@link #advance(int)} or {@link #nextDoc()}. - * Consumers should then call {@link #nextInterval()} to retrieve intervals until - * {@link #NO_MORE_INTERVALS} is returned. + * Once the iterator is positioned on a document by calling {@link #advance(int)} + * or {@link #nextDoc()}, intervals may be retrieved by calling {@link #nextInterval()} + * until {@link #NO_MORE_INTERVALS} is returned. + * + * The limits of the current interval are returned by {@link #start()} and {@link #end()}. + * When the iterator has been moved to a new document, but before {@link #nextInterval()} + * has been called, both these methods return {@code -1}. + * + * Note that it is possible for a document to return {@link #NO_MORE_INTERVALS} + * on the first call to {@link #nextInterval()} */ public abstract class IntervalIterator extends DocIdSetIterator { From b67a52d6012fb34e5687f8e4bee520a88831b246 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 12 Mar 2018 09:10:40 +0000 Subject: [PATCH 73/83] javadocs --- .../org/apache/lucene/search/DisjunctionIntervalsSource.java | 5 ----- .../src/java/org/apache/lucene/search/IntervalIterator.java | 4 ++-- .../src/java/org/apache/lucene/search/IntervalQuery.java | 3 +++ .../src/java/org/apache/lucene/search/IntervalsSource.java | 3 +++ .../java/org/apache/lucene/search/TermIntervalsSource.java | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java index 2f7b01e88a8c..555f95b83a63 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java @@ -160,11 +160,6 @@ public int nextInterval() throws IOException { return current.start(); } - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } - private boolean contains(IntervalIterator it, int start, int end) { return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java index 1064ba062fa9..31dbffe86d46 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java @@ -20,7 +20,7 @@ import java.io.IOException; /** - * A {@link DocIdSetIterator} than also allows iteration over matching + * A {@link DocIdSetIterator} that also allows iteration over matching * intervals in a document. * * Once the iterator is positioned on a document by calling {@link #advance(int)} @@ -89,7 +89,7 @@ public final long cost() { /** * Advance the iterator to the next interval * - * @return the starting interval of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if + * @return the start of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if * there are no more intervals on the current document */ public abstract int nextInterval() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java index 83a81b062a24..aa843fc06d7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java @@ -34,6 +34,9 @@ /** * A query that retrieves documents containing intervals returned from an * {@link IntervalsSource} + * + * Static constructor functions for various different sources can be found in the + * {@link Intervals} class */ public final class IntervalQuery extends Query { diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java index e1d2fe1598cb..fb923ee5fade 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java @@ -27,6 +27,9 @@ /** * A helper class for {@link IntervalQuery} that provides an {@link IntervalIterator} * for a given field and segment + * + * Static constructor functions for various different sources can be found in the + * {@link Intervals} class */ public abstract class IntervalsSource { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java index 986b0f7fc479..b7d7e10afaf3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java @@ -89,7 +89,7 @@ protected void reset() throws IOException { @Override public String toString() { - return pe.docID() + ":" + pos; + return term.utf8ToString() + ":" + super.toString(); } }; } From d0e3fba5b0773758a05071ecc48574ba23dc24c2 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 14 Mar 2018 14:55:03 +0000 Subject: [PATCH 74/83] Move intervals to sandbox --- .../org/apache/lucene/search/DisiWrapper.java | 18 +- .../ConjunctionIntervalIterator.java | 6 +- .../ConjunctionIntervalsSource.java | 2 +- .../DifferenceIntervalFunction.java | 2 +- .../intervals}/DifferenceIntervalsSource.java | 2 +- .../lucene/intervals/DisiPriorityQueue.java | 171 ++++++++++++++++++ .../apache/lucene/intervals/DisiWrapper.java | 49 +++++ .../DisjunctionDISIApproximation.java | 76 ++++++++ .../DisjunctionIntervalsSource.java | 7 +- .../lucene/intervals}/IntervalFilter.java | 2 +- .../lucene/intervals}/IntervalFunction.java | 47 +++-- .../lucene/intervals}/IntervalIterator.java | 5 +- .../lucene/intervals}/IntervalQuery.java | 16 +- .../lucene/intervals}/IntervalScorer.java | 9 +- .../apache/lucene/intervals}/Intervals.java | 7 +- .../lucene/intervals}/IntervalsSource.java | 3 +- .../intervals}/LowpassIntervalsSource.java | 2 +- .../intervals}/TermIntervalsSource.java | 45 ++++- .../lucene/intervals}/TestIntervalQuery.java | 6 +- .../lucene/intervals}/TestIntervals.java | 11 +- 20 files changed, 408 insertions(+), 78 deletions(-) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/ConjunctionIntervalIterator.java (93%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/ConjunctionIntervalsSource.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DifferenceIntervalFunction.java (99%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DifferenceIntervalsSource.java (98%) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/DisjunctionIntervalsSource.java (96%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalFilter.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalFunction.java (85%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalIterator.java (96%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalQuery.java (90%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalScorer.java (89%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/Intervals.java (97%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/IntervalsSource.java (97%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/LowpassIntervalsSource.java (98%) rename lucene/{core/src/java/org/apache/lucene/search => sandbox/src/java/org/apache/lucene/intervals}/TermIntervalsSource.java (60%) rename lucene/{core/src/test/org/apache/lucene/search => sandbox/src/test/org/apache/lucene/intervals}/TestIntervalQuery.java (97%) rename lucene/{core/src/test/org/apache/lucene/search => sandbox/src/test/org/apache/lucene/intervals}/TestIntervals.java (95%) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index edca0e3a26a6..5fa01d1c3e4c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -46,14 +46,9 @@ public class DisiWrapper { public int lastApproxMatchDoc; // last doc of approximation that did match public int lastApproxNonMatchDoc; // last doc of approximation that did not match - // For IntervalIterators - // TODO clean this up! - public final IntervalIterator intervals; - public DisiWrapper(Scorer scorer) { this.scorer = scorer; this.spans = null; - this.intervals = null; this.iterator = scorer.iterator(); this.cost = iterator.cost(); this.doc = -1; @@ -71,7 +66,6 @@ public DisiWrapper(Scorer scorer) { public DisiWrapper(Spans spans) { this.scorer = null; this.spans = spans; - this.intervals = null; this.iterator = spans; this.cost = iterator.cost(); this.doc = -1; @@ -88,16 +82,6 @@ public DisiWrapper(Spans spans) { this.lastApproxMatchDoc = -2; } - public DisiWrapper(IntervalIterator iterator) { - this.scorer = null; - this.spans = null; - this.intervals = iterator; - this.iterator = iterator; - this.cost = iterator.cost(); - this.doc = -1; - this.twoPhaseView = null; - this.approximation = iterator; - this.matchCost = iterator.matchCost(); - } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java similarity index 93% rename from lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java index cfda0de03f50..3fd9daed2de9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; -import java.io.IOException; -import java.util.ArrayList; import java.util.List; +import org.apache.lucene.search.ConjunctionDISI; + abstract class ConjunctionIntervalIterator extends IntervalIterator { final List subIterators; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java index 2ee0422786c9..30874d96d5e6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.ArrayList; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java index bc89bfed70b3..10c697775abf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java index d26217729026..03ebdd5134cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DifferenceIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java new file mode 100644 index 000000000000..4f1e5c9511b1 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.intervals; + + +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.lucene.util.PriorityQueue; + +/** + * A priority queue of DocIdSetIterators that orders by current doc ID. + * This specialization is needed over {@link PriorityQueue} because the + * pluggable comparison function makes the rebalancing quite slow. + * @lucene.internal + */ +public final class DisiPriorityQueue implements Iterable { + + static int leftNode(int node) { + return ((node + 1) << 1) - 1; + } + + static int rightNode(int leftNode) { + return leftNode + 1; + } + + static int parentNode(int node) { + return ((node + 1) >>> 1) - 1; + } + + private final DisiWrapper[] heap; + private int size; + + public DisiPriorityQueue(int maxSize) { + heap = new DisiWrapper[maxSize]; + size = 0; + } + + public int size() { + return size; + } + + public DisiWrapper top() { + return heap[0]; + } + + /** Get the list of scorers which are on the current doc. */ + public DisiWrapper topList() { + final DisiWrapper[] heap = this.heap; + final int size = this.size; + DisiWrapper list = heap[0]; + list.next = null; + if (size >= 3) { + list = topList(list, heap, size, 1); + list = topList(list, heap, size, 2); + } else if (size == 2 && heap[1].doc == list.doc) { + list = prepend(heap[1], list); + } + return list; + } + + // prepend w1 (iterator) to w2 (list) + private DisiWrapper prepend(DisiWrapper w1, DisiWrapper w2) { + w1.next = w2; + return w1; + } + + private DisiWrapper topList(DisiWrapper list, DisiWrapper[] heap, + int size, int i) { + final DisiWrapper w = heap[i]; + if (w.doc == list.doc) { + list = prepend(w, list); + final int left = leftNode(i); + final int right = left + 1; + if (right < size) { + list = topList(list, heap, size, left); + list = topList(list, heap, size, right); + } else if (left < size && heap[left].doc == list.doc) { + list = prepend(heap[left], list); + } + } + return list; + } + + public DisiWrapper add(DisiWrapper entry) { + final DisiWrapper[] heap = this.heap; + final int size = this.size; + heap[size] = entry; + upHeap(size); + this.size = size + 1; + return heap[0]; + } + + public DisiWrapper pop() { + final DisiWrapper[] heap = this.heap; + final DisiWrapper result = heap[0]; + final int i = --size; + heap[0] = heap[i]; + heap[i] = null; + downHeap(i); + return result; + } + + public DisiWrapper updateTop() { + downHeap(size); + return heap[0]; + } + + DisiWrapper updateTop(DisiWrapper topReplacement) { + heap[0] = topReplacement; + return updateTop(); + } + + void upHeap(int i) { + final DisiWrapper node = heap[i]; + final int nodeDoc = node.doc; + int j = parentNode(i); + while (j >= 0 && nodeDoc < heap[j].doc) { + heap[i] = heap[j]; + i = j; + j = parentNode(j); + } + heap[i] = node; + } + + void downHeap(int size) { + int i = 0; + final DisiWrapper node = heap[0]; + int j = leftNode(i); + if (j < size) { + int k = rightNode(j); + if (k < size && heap[k].doc < heap[j].doc) { + j = k; + } + if (heap[j].doc < node.doc) { + do { + heap[i] = heap[j]; + i = j; + j = leftNode(i); + k = rightNode(j); + if (k < size && heap[k].doc < heap[j].doc) { + j = k; + } + } while (j < size && heap[j].doc < node.doc); + heap[i] = node; + } + } + } + + @Override + public Iterator iterator() { + return Arrays.asList(heap).subList(0, size).iterator(); + } + +} + + diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java new file mode 100644 index 000000000000..78c2cc215546 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.intervals; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; + +public class DisiWrapper { + + public final DocIdSetIterator iterator; + public final IntervalIterator intervals; + public final long cost; + public final float matchCost; // the match cost for two-phase iterators, 0 otherwise + public int doc; // the current doc, used for comparison + public DisiWrapper next; // reference to a next element, see #topList + + // An approximation of the iterator, or the iterator itself if it does not + // support two-phase iteration + public final DocIdSetIterator approximation; + // A two-phase view of the iterator, or null if the iterator does not support + // two-phase iteration + public final TwoPhaseIterator twoPhaseView; + + public DisiWrapper(IntervalIterator iterator) { + this.intervals = iterator; + this.iterator = iterator; + this.cost = iterator.cost(); + this.doc = -1; + this.twoPhaseView = null; + this.approximation = iterator; + this.matchCost = iterator.matchCost(); + } + +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java new file mode 100644 index 000000000000..30ab9d4bc676 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.intervals; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; + +/** + * A {@link DocIdSetIterator} which is a disjunction of the approximations of + * the provided iterators. + * @lucene.internal + */ +class DisjunctionDISIApproximation extends DocIdSetIterator { + + final DisiPriorityQueue subIterators; + final long cost; + + public DisjunctionDISIApproximation(DisiPriorityQueue subIterators) { + this.subIterators = subIterators; + long cost = 0; + for (DisiWrapper w : subIterators) { + cost += w.cost; + } + this.cost = cost; + } + + @Override + public long cost() { + return cost; + } + + @Override + public int docID() { + return subIterators.top().doc; + } + + @Override + public int nextDoc() throws IOException { + DisiWrapper top = subIterators.top(); + final int doc = top.doc; + do { + top.doc = top.approximation.nextDoc(); + top = subIterators.updateTop(); + } while (top.doc == doc); + + return top.doc; + } + + @Override + public int advance(int target) throws IOException { + DisiWrapper top = subIterators.top(); + do { + top.doc = top.approximation.advance(target); + top = subIterators.updateTop(); + } while (top.doc < target); + + return top.doc; + } +} + + diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java similarity index 96% rename from lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java index 555f95b83a63..f1b2381b3694 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.ArrayList; @@ -26,6 +26,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.PriorityQueue; class DisjunctionIntervalsSource extends IntervalsSource { @@ -148,13 +149,13 @@ public int nextInterval() throws IOException { int start = current.start(), end = current.end(); while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) { IntervalIterator it = intervalQueue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { intervalQueue.add(it); } } if (intervalQueue.size() == 0) { current = EMPTY; - return IntervalIterator.NO_MORE_INTERVALS; + return NO_MORE_INTERVALS; } current = intervalQueue.top(); return current.start(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java index 3add87d78243..47fea70f312f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java similarity index 85% rename from lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java index 628c54cef4c8..2299f152dabe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java @@ -15,11 +15,10 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.List; -import java.util.Objects; import org.apache.lucene.util.PriorityQueue; @@ -69,20 +68,20 @@ public int end() { @Override public int nextInterval() throws IOException { - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; int i = 1; while (i < subIterators.size()) { while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) { - if (subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) { i = i + 1; } else { - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; i = 1; } } @@ -127,7 +126,7 @@ public int end() { @Override public int nextInterval() throws IOException { - start = end = NO_MORE_INTERVALS; + start = end = IntervalIterator.NO_MORE_INTERVALS; int b = Integer.MAX_VALUE; i = 1; while (true) { @@ -137,7 +136,7 @@ public int nextInterval() throws IOException { if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end()) break; do { - if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) return start; } while (subIterators.get(i).start() <= subIterators.get(i - 1).end()); @@ -147,7 +146,7 @@ public int nextInterval() throws IOException { end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); i = 1; - if (subIterators.get(0).nextInterval() == NO_MORE_INTERVALS) + if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) return start; } } @@ -213,20 +212,20 @@ void updateRightExtreme(IntervalIterator it) { public int nextInterval() throws IOException { while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); updateRightExtreme(it); } } if (this.queue.size() < subIterators.length) - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; do { start = queue.top().start(); end = queueEnd; if (queue.top().end() == end) return start; IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { + if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { queue.add(it); updateRightExtreme(it); } @@ -274,16 +273,16 @@ public int end() { @Override public int nextInterval() throws IOException { if (bpos == false) - return NO_MORE_INTERVALS; - while (a.nextInterval() != NO_MORE_INTERVALS) { + return IntervalIterator.NO_MORE_INTERVALS; + while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { while (b.start() < a.start() && b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (a.start() <= b.start() && a.end() >= b.end()) return a.start(); } - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } @Override @@ -321,16 +320,16 @@ public int end() { @Override public int nextInterval() throws IOException { if (bpos == false) - return NO_MORE_INTERVALS; - while (a.nextInterval() != NO_MORE_INTERVALS) { + return IntervalIterator.NO_MORE_INTERVALS; + while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { while (b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) - return NO_MORE_INTERVALS; + if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) + return IntervalIterator.NO_MORE_INTERVALS; } if (b.start() <= a.start()) return a.start(); } - return NO_MORE_INTERVALS; + return IntervalIterator.NO_MORE_INTERVALS; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java similarity index 96% rename from lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java index 31dbffe86d46..f6a5f89efc12 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java @@ -15,10 +15,13 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; + /** * A {@link DocIdSetIterator} that also allows iteration over matching * intervals in a document. diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java similarity index 90% rename from lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java index aa843fc06d7c..4d2c22f97a35 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java @@ -15,20 +15,26 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Objects; import java.util.Set; -import java.util.stream.Collectors; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; /** @@ -74,7 +80,7 @@ private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) TermStatistics[] termStats = new TermStatistics[terms.size()]; int termUpTo = 0; for (Term term : terms) { - TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.readerContext, term, true)); + TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.getTopReaderContext(), term, true)); if (termStatistics != null) { termStats[termUpTo++] = termStatistics; } diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java similarity index 89% rename from lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java index 279d3e00885c..a28eddcf16fd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java @@ -15,10 +15,17 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + class IntervalScorer extends Scorer { private final IntervalIterator intervals; diff --git a/lucene/core/src/java/org/apache/lucene/search/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java similarity index 97% rename from lucene/core/src/java/org/apache/lucene/search/Intervals.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java index aa29fa2f4b99..7b95e4cf0f7b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Intervals.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java @@ -15,15 +15,10 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; -import java.io.IOException; import java.util.Arrays; -import java.util.Objects; -import java.util.Set; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; /** diff --git a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java similarity index 97% rename from lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java index fb923ee5fade..405423bec334 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java @@ -15,10 +15,9 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; -import java.util.Objects; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; diff --git a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java index 39f24fbfb670..82f4acf785be 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java similarity index 60% rename from lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java index b7d7e10afaf3..00ed08984720 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java @@ -15,17 +15,23 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; import java.util.Objects; import java.util.Set; +import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.util.BytesRef; class TermIntervalsSource extends IntervalsSource { @@ -47,7 +53,7 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO TermsEnum te = terms.iterator(); te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); - float cost = PhraseQuery.termPositionsCost(te); + float cost = termPositionsCost(te); return new IntervalIterator(pe) { int pos = -1, upto; @@ -116,4 +122,39 @@ public String toString() { public void extractTerms(String field, Set terms) { terms.add(new Term(field, term)); } + + /** A guess of + * the average number of simple operations for the initial seek and buffer refill + * per document for the positions of a term. + * See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}. + *

    + * Aside: Instead of being constant this could depend among others on + * {@link Lucene50PostingsFormat#BLOCK_SIZE}, + * {@link TermsEnum#docFreq()}, + * {@link TermsEnum#totalTermFreq()}, + * {@link DocIdSetIterator#cost()} (expected number of matching docs), + * {@link LeafReader#maxDoc()} (total number of docs in the segment), + * and the seek time and block size of the device storing the index. + */ + private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; + + /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()} + * when no seek or buffer refill is done. + */ + private static final int TERM_OPS_PER_POS = 7; + + /** Returns an expected cost in simple operations + * of processing the occurrences of a term + * in a document that contains the term. + * This is for use by {@link TwoPhaseIterator#matchCost} implementations. + * @param termsEnum The term is the term at which this TermsEnum is positioned. + */ + static float termPositionsCost(TermsEnum termsEnum) throws IOException { + // TODO: When intervals move to core, refactor to use the copy of this in PhraseQuery + int docFreq = termsEnum.docFreq(); + assert docFreq > 0; + long totalTermFreq = termsEnum.totalTermFreq(); + float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; + return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java similarity index 97% rename from lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java rename to lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java index 6c1ba8ff6c1b..489603f67fc0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervalQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; @@ -24,7 +24,9 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java similarity index 95% rename from lucene/core/src/test/org/apache/lucene/search/TestIntervals.java rename to lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java index 0e7f7ce604c5..182299ae9443 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.search; +package org.apache.lucene.intervals; import java.io.IOException; @@ -31,10 +31,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -88,7 +85,7 @@ public static void teardownIndex() throws IOException { private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { int matchedDocs = 0; - for (LeafReaderContext ctx : searcher.leafContexts) { + for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { assertNull(source.intervals(field + "fake", ctx)); NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); IntervalIterator intervals = source.intervals(field, ctx); @@ -123,7 +120,7 @@ private void checkIntervals(IntervalsSource source, String field, int expectedMa public void testIntervalsOnFieldWithNoPositions() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { - Intervals.term("wibble").intervals("id", searcher.leafContexts.get(0)); + Intervals.term("wibble").intervals("id", searcher.getIndexReader().leaves().get(0)); }); assertEquals("Cannot create an IntervalIterator over field id because it has no indexed positions", e.getMessage()); } From eb14d9b5056b4353621991b3b5f7f418563c3522 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 15 Mar 2018 11:25:52 +0000 Subject: [PATCH 75/83] cleanup --- .../core/src/java/org/apache/lucene/search/ConjunctionDISI.java | 2 +- lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 8ed42316a09f..780e854033a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -58,7 +58,7 @@ public static DocIdSetIterator intersectScorers(Collection scorers) { * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using * {@link TwoPhaseIterator#unwrap}. */ - public static DocIdSetIterator intersectIterators(List iterators) { + public static DocIdSetIterator intersectIterators(List iterators) { if (iterators.size() < 2) { throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 5fa01d1c3e4c..fac9418010f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,7 +81,5 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } - - } From e9b30c23e376984646ab26a7de5f9974677d0ae5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 15 Mar 2018 12:26:14 +0000 Subject: [PATCH 76/83] Add package info --- .../apache/lucene/intervals/package-info.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java new file mode 100644 index 000000000000..b94a24cd4f6a --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.intervals; + +/** + *

    Intervals queries

    + * + * This package contains experimental classes to search over intervals within fields + * + *

    IntervalsSource

    + * + * The {@link org.apache.lucene.intervals.IntervalsSource} class can be used to construct proximity + * relationships between terms and intervals. They can be built using static methods + * in the {@link org.apache.lucene.intervals.Intervals} class + * + *

    Basic intervals

    + * + *
      + *
    • {@link org.apache.lucene.intervals.Intervals#term(String)} — Represents a single term
    • + *
    • {@link org.apache.lucene.intervals.Intervals#phrase(java.lang.String...)} — Represents a phrase
    • + *
    • {@link org.apache.lucene.intervals.Intervals#ordered(org.apache.lucene.intervals.IntervalsSource...)} + * — Represents an interval over an ordered set of terms or intervals
    • + *
    • {@link org.apache.lucene.intervals.Intervals#unordered(org.apache.lucene.intervals.IntervalsSource...)} + * — Represents an interval over an unordered set of terms or intervals
    • + *
    • {@link org.apache.lucene.intervals.Intervals#or(org.apache.lucene.intervals.IntervalsSource...)} + * — Represents the disjunction of a set of terms or intervals
    • + *
    + * + *

    Filters

    + * + *
      + *
    • {@link org.apache.lucene.intervals.Intervals#maxwidth(int, org.apache.lucene.intervals.IntervalsSource)} + * — Filters out intervals that are larger than a set width
    • + *
    • {@link org.apache.lucene.intervals.Intervals#containedBy(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that are contained by another interval
    • + *
    • {@link org.apache.lucene.intervals.Intervals#notContainedBy(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that are *not* contained by another interval
    • + *
    • {@link org.apache.lucene.intervals.Intervals#containing(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that contain another interval
    • + *
    • {@link org.apache.lucene.intervals.Intervals#notContaining(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that do not contain another interval
    • + *
    • {@link org.apache.lucene.intervals.Intervals#nonOverlapping(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that do not overlap with another interval
    • + *
    • {@link org.apache.lucene.intervals.Intervals#notWithin(org.apache.lucene.intervals.IntervalsSource, int, org.apache.lucene.intervals.IntervalsSource)} + * — Returns intervals that do not appear within a set number of positions of another interval
    • + *
    + * + *

    IntervalQuery

    + * + * An {@link org.apache.lucene.intervals.IntervalQuery} takes a field name and an {@link org.apache.lucene.intervals.IntervalsSource}, + * and matches all documents that contain intervals defined by the source in that field. + */ \ No newline at end of file From 412cc2bb13a112160e81ae4a0c5bd5a52ce9f4fd Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 16 Mar 2018 16:57:10 +0000 Subject: [PATCH 77/83] Move package to org.apache.lucene.search.intervals --- .../apache/lucene/intervals/package-info.java | 67 ------------------- .../ConjunctionIntervalIterator.java | 2 +- .../intervals/ConjunctionIntervalsSource.java | 2 +- .../intervals/DifferenceIntervalFunction.java | 2 +- .../intervals/DifferenceIntervalsSource.java | 2 +- .../intervals/DisiPriorityQueue.java | 2 +- .../{ => search}/intervals/DisiWrapper.java | 2 +- .../DisjunctionDISIApproximation.java | 2 +- .../intervals/DisjunctionIntervalsSource.java | 2 +- .../intervals/IntervalFilter.java | 2 +- .../intervals/IntervalFunction.java | 2 +- .../intervals/IntervalIterator.java | 2 +- .../{ => search}/intervals/IntervalQuery.java | 2 +- .../intervals/IntervalScorer.java | 2 +- .../{ => search}/intervals/Intervals.java | 2 +- .../intervals/IntervalsSource.java | 2 +- .../intervals/LowpassIntervalsSource.java | 2 +- .../intervals/TermIntervalsSource.java | 2 +- .../lucene/search/intervals/package-info.java | 67 +++++++++++++++++++ .../intervals/TestIntervalQuery.java | 2 +- .../{ => search}/intervals/TestIntervals.java | 2 +- 21 files changed, 86 insertions(+), 86 deletions(-) delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/ConjunctionIntervalIterator.java (96%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/ConjunctionIntervalsSource.java (98%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DifferenceIntervalFunction.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DifferenceIntervalsSource.java (98%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DisiPriorityQueue.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DisiWrapper.java (97%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DisjunctionDISIApproximation.java (97%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/DisjunctionIntervalsSource.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalFilter.java (97%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalFunction.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalIterator.java (98%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalQuery.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalScorer.java (98%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/Intervals.java (99%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/IntervalsSource.java (97%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/LowpassIntervalsSource.java (97%) rename lucene/sandbox/src/java/org/apache/lucene/{ => search}/intervals/TermIntervalsSource.java (99%) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java rename lucene/sandbox/src/test/org/apache/lucene/{ => search}/intervals/TestIntervalQuery.java (99%) rename lucene/sandbox/src/test/org/apache/lucene/{ => search}/intervals/TestIntervals.java (99%) diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java deleted file mode 100644 index b94a24cd4f6a..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/package-info.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -/** - *

    Intervals queries

    - * - * This package contains experimental classes to search over intervals within fields - * - *

    IntervalsSource

    - * - * The {@link org.apache.lucene.intervals.IntervalsSource} class can be used to construct proximity - * relationships between terms and intervals. They can be built using static methods - * in the {@link org.apache.lucene.intervals.Intervals} class - * - *

    Basic intervals

    - * - *
      - *
    • {@link org.apache.lucene.intervals.Intervals#term(String)} — Represents a single term
    • - *
    • {@link org.apache.lucene.intervals.Intervals#phrase(java.lang.String...)} — Represents a phrase
    • - *
    • {@link org.apache.lucene.intervals.Intervals#ordered(org.apache.lucene.intervals.IntervalsSource...)} - * — Represents an interval over an ordered set of terms or intervals
    • - *
    • {@link org.apache.lucene.intervals.Intervals#unordered(org.apache.lucene.intervals.IntervalsSource...)} - * — Represents an interval over an unordered set of terms or intervals
    • - *
    • {@link org.apache.lucene.intervals.Intervals#or(org.apache.lucene.intervals.IntervalsSource...)} - * — Represents the disjunction of a set of terms or intervals
    • - *
    - * - *

    Filters

    - * - *
      - *
    • {@link org.apache.lucene.intervals.Intervals#maxwidth(int, org.apache.lucene.intervals.IntervalsSource)} - * — Filters out intervals that are larger than a set width
    • - *
    • {@link org.apache.lucene.intervals.Intervals#containedBy(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that are contained by another interval
    • - *
    • {@link org.apache.lucene.intervals.Intervals#notContainedBy(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that are *not* contained by another interval
    • - *
    • {@link org.apache.lucene.intervals.Intervals#containing(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that contain another interval
    • - *
    • {@link org.apache.lucene.intervals.Intervals#notContaining(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that do not contain another interval
    • - *
    • {@link org.apache.lucene.intervals.Intervals#nonOverlapping(org.apache.lucene.intervals.IntervalsSource, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that do not overlap with another interval
    • - *
    • {@link org.apache.lucene.intervals.Intervals#notWithin(org.apache.lucene.intervals.IntervalsSource, int, org.apache.lucene.intervals.IntervalsSource)} - * — Returns intervals that do not appear within a set number of positions of another interval
    • - *
    - * - *

    IntervalQuery

    - * - * An {@link org.apache.lucene.intervals.IntervalQuery} takes a field name and an {@link org.apache.lucene.intervals.IntervalsSource}, - * and matches all documents that contain intervals defined by the source in that field. - */ \ No newline at end of file diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java similarity index 96% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java index 3fd9daed2de9..29d617cfd590 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.util.List; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java similarity index 98% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java index 30874d96d5e6..d2805c978901 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.ArrayList; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java index 10c697775abf..934869490006 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java similarity index 98% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java index 03ebdd5134cb..316b6ff14c4b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java index 4f1e5c9511b1..448b7192b78a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.util.Arrays; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java similarity index 97% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java index 78c2cc215546..d1177ac186ec 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TwoPhaseIterator; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionDISIApproximation.java similarity index 97% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionDISIApproximation.java index 30ab9d4bc676..d4e8e79a1801 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionDISIApproximation.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java index f1b2381b3694..1a5d3063f105 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.ArrayList; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java similarity index 97% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java index 47fea70f312f..10fcc03430fe 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java index 2299f152dabe..c550a722517b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.List; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java similarity index 98% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java index f6a5f89efc12..f31b2d7abb2c 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java index 4d2c22f97a35..934d553717b1 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Arrays; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java similarity index 98% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java index a28eddcf16fd..7ff04c8c46dc 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java index 7b95e4cf0f7b..b36091929630 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.util.Arrays; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java similarity index 97% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java index 405423bec334..9791ff87b6f7 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Set; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java similarity index 97% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java index 82f4acf785be..43fdf7596c02 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java similarity index 99% rename from lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java index 00ed08984720..a1b94607f0b9 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.Objects; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java new file mode 100644 index 000000000000..a2d78f20b59f --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.intervals; + +/** + *

    Intervals queries

    + * + * This package contains experimental classes to search over intervals within fields + * + *

    IntervalsSource

    + * + * The {@link org.apache.lucene.search.intervals.IntervalsSource} class can be used to construct proximity + * relationships between terms and intervals. They can be built using static methods + * in the {@link org.apache.lucene.search.intervals.Intervals} class + * + *

    Basic intervals

    + * + *
      + *
    • {@link org.apache.lucene.search.intervals.Intervals#term(String)} — Represents a single term
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#phrase(java.lang.String...)} — Represents a phrase
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#ordered(org.apache.lucene.search.intervals.IntervalsSource...)} + * — Represents an interval over an ordered set of terms or intervals
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#unordered(org.apache.lucene.search.intervals.IntervalsSource...)} + * — Represents an interval over an unordered set of terms or intervals
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#or(org.apache.lucene.search.intervals.IntervalsSource...)} + * — Represents the disjunction of a set of terms or intervals
    • + *
    + * + *

    Filters

    + * + *
      + *
    • {@link org.apache.lucene.search.intervals.Intervals#maxwidth(int, org.apache.lucene.search.intervals.IntervalsSource)} + * — Filters out intervals that are larger than a set width
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#containedBy(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that are contained by another interval
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#notContainedBy(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that are *not* contained by another interval
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#containing(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that contain another interval
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#notContaining(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that do not contain another interval
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#nonOverlapping(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that do not overlap with another interval
    • + *
    • {@link org.apache.lucene.search.intervals.Intervals#notWithin(org.apache.lucene.search.intervals.IntervalsSource, int, org.apache.lucene.search.intervals.IntervalsSource)} + * — Returns intervals that do not appear within a set number of positions of another interval
    • + *
    + * + *

    IntervalQuery

    + * + * An {@link org.apache.lucene.search.intervals.IntervalQuery} takes a field name and an {@link org.apache.lucene.search.intervals.IntervalsSource}, + * and matches all documents that contain intervals defined by the source in that field. + */ \ No newline at end of file diff --git a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java similarity index 99% rename from lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java rename to lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java index 489603f67fc0..c20fafac0028 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; diff --git a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java similarity index 99% rename from lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java rename to lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java index 182299ae9443..8f91a7f9788b 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.lucene.intervals; +package org.apache.lucene.search.intervals; import java.io.IOException; From 636b337d5ce1fbf441fa2471a9d798c8260c93ee Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 16 Mar 2018 16:58:37 +0000 Subject: [PATCH 78/83] Make some impl classes package-private --- .../org/apache/lucene/search/intervals/DisiPriorityQueue.java | 2 +- .../java/org/apache/lucene/search/intervals/DisiWrapper.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java index 448b7192b78a..0be96ab9d461 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiPriorityQueue.java @@ -28,7 +28,7 @@ * pluggable comparison function makes the rebalancing quite slow. * @lucene.internal */ -public final class DisiPriorityQueue implements Iterable { +final class DisiPriorityQueue implements Iterable { static int leftNode(int node) { return ((node + 1) << 1) - 1; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java index d1177ac186ec..0dc61d481fb8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java @@ -20,7 +20,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TwoPhaseIterator; -public class DisiWrapper { +class DisiWrapper { public final DocIdSetIterator iterator; public final IntervalIterator intervals; From 5701af9db686d0576fbdb3fe00d92fce44a789e0 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 16 Mar 2018 17:50:23 +0000 Subject: [PATCH 79/83] Slim down IntervalIterator, move approximations & implementations into descendant classes --- .../search/intervals/ConjunctionDISI.java | 307 ++++++++++++++++++ .../ConjunctionIntervalIterator.java | 33 +- .../intervals/DifferenceIntervalFunction.java | 54 ++- .../intervals/DisjunctionIntervalsSource.java | 69 +++- .../search/intervals/IntervalFilter.java | 26 +- .../search/intervals/IntervalIterator.java | 40 --- .../intervals/LowpassIntervalsSource.java | 2 +- .../search/intervals/TermIntervalsSource.java | 29 +- 8 files changed, 481 insertions(+), 79 deletions(-) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java new file mode 100644 index 000000000000..cb5963d75566 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.intervals; + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.CollectionUtil; + +/** A conjunction of DocIdSetIterators. + * This iterates over the doc ids that are present in each given DocIdSetIterator. + *
    Public only for use in {@link org.apache.lucene.search.spans}. + * @lucene.internal + */ +final class ConjunctionDISI extends DocIdSetIterator { + + /** Create a conjunction over the provided DocIdSetIterators. Note that the + * returned {@link DocIdSetIterator} might leverage two-phase iteration in + * which case it is possible to retrieve the {@link TwoPhaseIterator} using + * {@link TwoPhaseIterator#unwrap}. */ + public static DocIdSetIterator intersectIterators(List iterators) { + if (iterators.size() < 2) { + throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); + } + final List allIterators = new ArrayList<>(); + final List twoPhaseIterators = new ArrayList<>(); + for (DocIdSetIterator iterator : iterators) { + addIterator(iterator, allIterators, twoPhaseIterators); + } + + return createConjunction(allIterators, twoPhaseIterators); + } + + private static void addIterator(DocIdSetIterator disi, List allIterators, List twoPhaseIterators) { + TwoPhaseIterator twoPhase = TwoPhaseIterator.unwrap(disi); + if (twoPhase != null) { + addTwoPhaseIterator(twoPhase, allIterators, twoPhaseIterators); + } else if (disi.getClass() == ConjunctionDISI.class) { // Check for exactly this class for collapsing + ConjunctionDISI conjunction = (ConjunctionDISI) disi; + // subconjuctions have already split themselves into two phase iterators and others, so we can take those + // iterators as they are and move them up to this conjunction + allIterators.add(conjunction.lead1); + allIterators.add(conjunction.lead2); + Collections.addAll(allIterators, conjunction.others); + } else if (disi.getClass() == BitSetConjunctionDISI.class) { + BitSetConjunctionDISI conjunction = (BitSetConjunctionDISI) disi; + allIterators.add(conjunction.lead); + Collections.addAll(allIterators, conjunction.bitSetIterators); + } else { + allIterators.add(disi); + } + } + + private static void addTwoPhaseIterator(TwoPhaseIterator twoPhaseIter, List allIterators, List twoPhaseIterators) { + addIterator(twoPhaseIter.approximation(), allIterators, twoPhaseIterators); + if (twoPhaseIter.getClass() == ConjunctionTwoPhaseIterator.class) { // Check for exactly this class for collapsing + Collections.addAll(twoPhaseIterators, ((ConjunctionTwoPhaseIterator) twoPhaseIter).twoPhaseIterators); + } else { + twoPhaseIterators.add(twoPhaseIter); + } + } + + private static DocIdSetIterator createConjunction( + List allIterators, + List twoPhaseIterators) { + long minCost = allIterators.stream().mapToLong(DocIdSetIterator::cost).min().getAsLong(); + List bitSetIterators = new ArrayList<>(); + List iterators = new ArrayList<>(); + for (DocIdSetIterator iterator : allIterators) { + if (iterator.cost() > minCost && iterator instanceof BitSetIterator) { + // we put all bitset iterators into bitSetIterators + // except if they have the minimum cost, since we need + // them to lead the iteration in that case + bitSetIterators.add((BitSetIterator) iterator); + } else { + iterators.add(iterator); + } + } + + DocIdSetIterator disi; + if (iterators.size() == 1) { + disi = iterators.get(0); + } else { + disi = new ConjunctionDISI(iterators); + } + + if (bitSetIterators.size() > 0) { + disi = new BitSetConjunctionDISI(disi, bitSetIterators); + } + + if (twoPhaseIterators.isEmpty() == false) { + disi = TwoPhaseIterator.asDocIdSetIterator(new ConjunctionTwoPhaseIterator(disi, twoPhaseIterators)); + } + + return disi; + } + + final DocIdSetIterator lead1, lead2; + final DocIdSetIterator[] others; + + private ConjunctionDISI(List iterators) { + assert iterators.size() >= 2; + // Sort the array the first time to allow the least frequent DocsEnum to + // lead the matching. + CollectionUtil.timSort(iterators, new Comparator() { + @Override + public int compare(DocIdSetIterator o1, DocIdSetIterator o2) { + return Long.compare(o1.cost(), o2.cost()); + } + }); + lead1 = iterators.get(0); + lead2 = iterators.get(1); + others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]); + } + + private int doNext(int doc) throws IOException { + advanceHead: for(;;) { + assert doc == lead1.docID(); + + // find agreement between the two iterators with the lower costs + // we special case them because they do not need the + // 'other.docID() < doc' check that the 'others' iterators need + final int next2 = lead2.advance(doc); + if (next2 != doc) { + doc = lead1.advance(next2); + if (next2 != doc) { + continue; + } + } + + // then find agreement with other iterators + for (DocIdSetIterator other : others) { + // other.doc may already be equal to doc if we "continued advanceHead" + // on the previous iteration and the advance on the lead scorer exactly matched. + if (other.docID() < doc) { + final int next = other.advance(doc); + + if (next > doc) { + // iterator beyond the current doc - advance lead and continue to the new highest doc. + doc = lead1.advance(next); + continue advanceHead; + } + } + } + + // success - all iterators are on the same doc + return doc; + } + } + + @Override + public int advance(int target) throws IOException { + return doNext(lead1.advance(target)); + } + + @Override + public int docID() { + return lead1.docID(); + } + + @Override + public int nextDoc() throws IOException { + return doNext(lead1.nextDoc()); + } + + @Override + public long cost() { + return lead1.cost(); // overestimate + } + + /** Conjunction between a {@link DocIdSetIterator} and one or more {@link BitSetIterator}s. */ + private static class BitSetConjunctionDISI extends DocIdSetIterator { + + private final DocIdSetIterator lead; + private final BitSetIterator[] bitSetIterators; + private final BitSet[] bitSets; + private final int minLength; + + BitSetConjunctionDISI(DocIdSetIterator lead, Collection bitSetIterators) { + this.lead = lead; + assert bitSetIterators.size() > 0; + this.bitSetIterators = bitSetIterators.toArray(new BitSetIterator[0]); + // Put the least costly iterators first so that we exit as soon as possible + ArrayUtil.timSort(this.bitSetIterators, (a, b) -> Long.compare(a.cost(), b.cost())); + this.bitSets = new BitSet[this.bitSetIterators.length]; + int minLen = Integer.MAX_VALUE; + for (int i = 0; i < this.bitSetIterators.length; ++i) { + BitSet bitSet = this.bitSetIterators[i].getBitSet(); + this.bitSets[i] = bitSet; + minLen = Math.min(minLen, bitSet.length()); + } + this.minLength = minLen; + } + + @Override + public int docID() { + return lead.docID(); + } + + @Override + public int nextDoc() throws IOException { + return doNext(lead.nextDoc()); + } + + @Override + public int advance(int target) throws IOException { + return doNext(lead.advance(target)); + } + + private int doNext(int doc) throws IOException { + advanceLead: for (;; doc = lead.nextDoc()) { + if (doc >= minLength) { + return NO_MORE_DOCS; + } + for (BitSet bitSet : bitSets) { + if (bitSet.get(doc) == false) { + continue advanceLead; + } + } + for (BitSetIterator iterator : bitSetIterators) { + iterator.setDocId(doc); + } + return doc; + } + } + + @Override + public long cost() { + return lead.cost(); + } + + } + + /** + * {@link TwoPhaseIterator} implementing a conjunction. + */ + private static final class ConjunctionTwoPhaseIterator extends TwoPhaseIterator { + + private final TwoPhaseIterator[] twoPhaseIterators; + private final float matchCost; + + private ConjunctionTwoPhaseIterator(DocIdSetIterator approximation, + List twoPhaseIterators) { + super(approximation); + assert twoPhaseIterators.size() > 0; + + CollectionUtil.timSort(twoPhaseIterators, new Comparator() { + @Override + public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) { + return Float.compare(o1.matchCost(), o2.matchCost()); + } + }); + + this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]); + + // Compute the matchCost as the total matchCost of the sub iterators. + // TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight. + float totalMatchCost = 0; + for (TwoPhaseIterator tpi : twoPhaseIterators) { + totalMatchCost += tpi.matchCost(); + } + matchCost = totalMatchCost; + } + + @Override + public boolean matches() throws IOException { + for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first + if (twoPhaseIterator.matches() == false) { + return false; + } + } + return true; + } + + @Override + public float matchCost() { + return matchCost; + } + + } + +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java index 29d617cfd590..8efe4e43b556 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java @@ -17,18 +17,19 @@ package org.apache.lucene.search.intervals; +import java.io.IOException; import java.util.List; -import org.apache.lucene.search.ConjunctionDISI; +import org.apache.lucene.search.DocIdSetIterator; abstract class ConjunctionIntervalIterator extends IntervalIterator { + final DocIdSetIterator approximation; final List subIterators; - final float cost; ConjunctionIntervalIterator(List subIterators) { - super(ConjunctionDISI.intersectIterators(subIterators)); + this.approximation = ConjunctionDISI.intersectIterators(subIterators); this.subIterators = subIterators; float costsum = 0; for (IntervalIterator it : subIterators) { @@ -37,6 +38,32 @@ abstract class ConjunctionIntervalIterator extends IntervalIterator { this.cost = costsum; } + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public int nextDoc() throws IOException { + int doc = approximation.nextDoc(); + reset(); + return doc; + } + + @Override + public int advance(int target) throws IOException { + int doc = approximation.advance(target); + reset(); + return doc; + } + + protected abstract void reset() throws IOException; + + @Override + public long cost() { + return approximation.cost(); + } + @Override public final float matchCost() { return cost; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java index 934869490006..18d4d677e536 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java @@ -81,12 +81,34 @@ private static abstract class RelativeIterator extends IntervalIterator { boolean bpos; RelativeIterator(IntervalIterator a, IntervalIterator b) { - super(a); this.a = a; this.b = b; } @Override + public int docID() { + return a.docID(); + } + + @Override + public int nextDoc() throws IOException { + int doc = a.nextDoc(); + reset(); + return doc; + } + + @Override + public int advance(int target) throws IOException { + int doc = a.advance(target); + reset(); + return doc; + } + + @Override + public long cost() { + return a.cost(); + } + protected void reset() throws IOException { int doc = a.docID(); bpos = b.docID() == doc || @@ -165,7 +187,29 @@ public int hashCode() { @Override public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new IntervalIterator(subtrahend) { + IntervalIterator notWithin = new IntervalIterator() { + + @Override + public int docID() { + return subtrahend.docID(); + } + + @Override + public int nextDoc() throws IOException { + positioned = false; + return subtrahend.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + positioned = false; + return subtrahend.advance(target); + } + + @Override + public long cost() { + return subtrahend.cost(); + } boolean positioned = false; @@ -201,12 +245,6 @@ public float matchCost() { return subtrahend.matchCost(); } - @Override - protected void reset() throws IOException { - // already called when the subtrahend approximation is advanced - positioned = false; - } - }; return NON_OVERLAPPING.apply(minuend, notWithin); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java index 1a5d3063f105..31b7e7d3404a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java @@ -78,6 +78,7 @@ public void extractTerms(String field, Set terms) { private static class DisjunctionIntervalIterator extends IntervalIterator { + final DocIdSetIterator approximation; final PriorityQueue intervalQueue; final DisiPriorityQueue disiQueue; final List iterators; @@ -86,8 +87,11 @@ private static class DisjunctionIntervalIterator extends IntervalIterator { IntervalIterator current = EMPTY; DisjunctionIntervalIterator(List iterators) { - super(buildApproximation(iterators)); - this.disiQueue = ((DisjunctionDISIApproximation)approximation).subIterators; + this.disiQueue = new DisiPriorityQueue(iterators.size()); + for (IntervalIterator it : iterators) { + disiQueue.add(new DisiWrapper(it)); + } + this.approximation = new DisjunctionDISIApproximation(disiQueue); this.iterators = iterators; this.intervalQueue = new PriorityQueue(iterators.size()) { @Override @@ -105,14 +109,6 @@ protected boolean lessThan(IntervalIterator a, IntervalIterator b) { this.matchCost = costsum; } - private static DocIdSetIterator buildApproximation(List iterators) { - DisiPriorityQueue disiQueue = new DisiPriorityQueue(iterators.size()); - for (IntervalIterator it : iterators) { - disiQueue.add(new DisiWrapper(it)); - } - return new DisjunctionDISIApproximation(disiQueue); - } - @Override public float matchCost() { return matchCost; @@ -128,8 +124,7 @@ public int end() { return current.end(); } - @Override - protected void reset() throws IOException { + private void reset() throws IOException { intervalQueue.clear(); for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { dw.intervals.nextInterval(); @@ -165,9 +160,52 @@ private boolean contains(IntervalIterator it, int start, int end) { return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); } + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public int nextDoc() throws IOException { + int doc = approximation.nextDoc(); + reset(); + return doc; + } + + @Override + public int advance(int target) throws IOException { + int doc = approximation.advance(target); + reset(); + return doc; + } + + @Override + public long cost() { + return approximation.cost(); + } } - private static final IntervalIterator EMPTY = new IntervalIterator(DocIdSetIterator.empty()) { + private static final IntervalIterator EMPTY = new IntervalIterator() { + + @Override + public int docID() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + throw new UnsupportedOperationException(); + } @Override public int start() { @@ -188,11 +226,6 @@ public int nextInterval() { public float matchCost() { return 0; } - - @Override - protected void reset() throws IOException { - - } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java index 10fcc03430fe..d1d2fcf9bfb5 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java @@ -30,10 +30,29 @@ public abstract class IntervalFilter extends IntervalIterator { * Create a new filter */ public IntervalFilter(IntervalIterator in) { - super(in.approximation); this.in = in; } + @Override + public int docID() { + return in.docID(); + } + + @Override + public int nextDoc() throws IOException { + return in.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } + + @Override + public long cost() { + return in.cost(); + } + @Override public int start() { return in.start(); @@ -49,11 +68,6 @@ public float matchCost() { return in.matchCost(); } - @Override - protected void reset() throws IOException { - in.reset(); - } - /** * @return {@code true} if the wrapped iterator's interval should be passed on */ diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java index f31b2d7abb2c..242872001d89 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java @@ -39,42 +39,12 @@ */ public abstract class IntervalIterator extends DocIdSetIterator { - protected final DocIdSetIterator approximation; - - protected IntervalIterator(DocIdSetIterator approximation) { - this.approximation = approximation; - } - /** * When returned from {@link #nextInterval()}, indicates that there are no more * matching intervals on the current document */ public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - @Override - public final int docID() { - return approximation.docID(); - } - - @Override - public final int nextDoc() throws IOException { - int doc = approximation.nextDoc(); - reset(); - return doc; - } - - @Override - public final int advance(int target) throws IOException { - int doc = approximation.advance(target); - reset(); - return doc; - } - - @Override - public final long cost() { - return approximation.cost(); - } - /** * The start of the current interval * @@ -104,14 +74,4 @@ public final long cost() { */ public abstract float matchCost(); - /** - * Called when the underlying iterator has been advanced. - */ - protected abstract void reset() throws IOException; - - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } - } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java index 43fdf7596c02..3bb469ebd79f 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java @@ -27,7 +27,7 @@ class LowpassIntervalsSource extends IntervalsSource { final IntervalsSource in; - final int maxWidth; + private final int maxWidth; LowpassIntervalsSource(IntervalsSource in, int maxWidth) { this.in = in; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java index a1b94607f0b9..84e558bb09ec 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java @@ -54,7 +54,31 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO te.seekExact(term); PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); float cost = termPositionsCost(te); - return new IntervalIterator(pe) { + return new IntervalIterator() { + + @Override + public int docID() { + return pe.docID(); + } + + @Override + public int nextDoc() throws IOException { + int doc = pe.nextDoc(); + reset(); + return doc; + } + + @Override + public int advance(int target) throws IOException { + int doc = pe.advance(target); + reset(); + return doc; + } + + @Override + public long cost() { + return pe.cost(); + } int pos = -1, upto; @@ -81,8 +105,7 @@ public float matchCost() { return cost; } - @Override - protected void reset() throws IOException { + private void reset() throws IOException { if (pe.docID() == NO_MORE_DOCS) { upto = -1; pos = NO_MORE_INTERVALS; From f6f54e6ae5db6e8e2fe12d14e5f87ea5b3421e37 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 16 Mar 2018 17:53:57 +0000 Subject: [PATCH 80/83] cleanup --- .../apache/lucene/search/ConjunctionDISI.java | 2 +- .../org/apache/lucene/search/DisiWrapper.java | 2 - .../ConjunctionIntervalIterator.java | 45 --- .../intervals/ConjunctionIntervalsSource.java | 77 ---- .../intervals/DifferenceIntervalFunction.java | 288 -------------- .../intervals/DifferenceIntervalsSource.java | 74 ---- .../lucene/intervals/DisiPriorityQueue.java | 171 -------- .../apache/lucene/intervals/DisiWrapper.java | 49 --- .../DisjunctionDISIApproximation.java | 76 ---- .../intervals/DisjunctionIntervalsSource.java | 198 ---------- .../lucene/intervals/IntervalFilter.java | 72 ---- .../lucene/intervals/IntervalFunction.java | 368 ------------------ .../lucene/intervals/IntervalIterator.java | 117 ------ .../lucene/intervals/IntervalQuery.java | 155 -------- .../lucene/intervals/IntervalScorer.java | 105 ----- .../apache/lucene/intervals/Intervals.java | 188 --------- .../lucene/intervals/IntervalsSource.java | 61 --- .../intervals/LowpassIntervalsSource.java | 71 ---- .../lucene/intervals/TermIntervalsSource.java | 160 -------- 19 files changed, 1 insertion(+), 2278 deletions(-) delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 8ed42316a09f..780e854033a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -58,7 +58,7 @@ public static DocIdSetIterator intersectScorers(Collection scorers) { * returned {@link DocIdSetIterator} might leverage two-phase iteration in * which case it is possible to retrieve the {@link TwoPhaseIterator} using * {@link TwoPhaseIterator#unwrap}. */ - public static DocIdSetIterator intersectIterators(List iterators) { + public static DocIdSetIterator intersectIterators(List iterators) { if (iterators.size() < 2) { throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 5fa01d1c3e4c..fac9418010f4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -81,7 +81,5 @@ public DisiWrapper(Spans spans) { this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; } - - } diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java deleted file mode 100644 index 3fd9daed2de9..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalIterator.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.util.List; - -import org.apache.lucene.search.ConjunctionDISI; - -abstract class ConjunctionIntervalIterator extends IntervalIterator { - - final List subIterators; - - final float cost; - - ConjunctionIntervalIterator(List subIterators) { - super(ConjunctionDISI.intersectIterators(subIterators)); - this.subIterators = subIterators; - float costsum = 0; - for (IntervalIterator it : subIterators) { - costsum += it.matchCost(); - } - this.cost = costsum; - } - - @Override - public final float matchCost() { - return cost; - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java deleted file mode 100644 index 30874d96d5e6..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/ConjunctionIntervalsSource.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; - -class ConjunctionIntervalsSource extends IntervalsSource { - - final List subSources; - final IntervalFunction function; - - ConjunctionIntervalsSource(List subSources, IntervalFunction function) { - this.subSources = subSources; - this.function = function; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ConjunctionIntervalsSource that = (ConjunctionIntervalsSource) o; - return Objects.equals(subSources, that.subSources) && - Objects.equals(function, that.function); - } - - @Override - public String toString() { - return function + subSources.stream().map(Object::toString).collect(Collectors.joining(",", "(", ")")); - } - - @Override - public void extractTerms(String field, Set terms) { - for (IntervalsSource source : subSources) { - source.extractTerms(field, terms); - } - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - List subIntervals = new ArrayList<>(); - for (IntervalsSource source : subSources) { - IntervalIterator it = source.intervals(field, ctx); - if (it == null) - return null; - subIntervals.add(it); - } - return function.apply(subIntervals); - } - - @Override - public int hashCode() { - return Objects.hash(subSources, function); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java deleted file mode 100644 index 10c697775abf..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalFunction.java +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Objects; - -/** - * A function that takes two interval iterators and combines them to produce a third, - * generally by computing a difference interval between them - */ -abstract class DifferenceIntervalFunction { - - @Override - public abstract int hashCode(); - - @Override - public abstract boolean equals(Object obj); - - @Override - public abstract String toString(); - - /** - * Combine two interval iterators into a third - */ - public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend); - - /** - * Filters the minuend iterator so that only intervals that do not overlap intervals from the - * subtrahend iterator are returned - */ - static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") { - @Override - public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NonOverlappingIterator(minuend, subtrahend); - } - }; - - /** - * Filters the minuend iterator so that only intervals that do not contain intervals from the - * subtrahend iterator are returned - */ - static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") { - @Override - public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NotContainingIterator(minuend, subtrahend); - } - }; - - /** - * Filters the minuend iterator so that only intervals that are not contained by intervals from - * the subtrahend iterator are returned - */ - static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") { - @Override - public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - return new NotContainedByIterator(minuend, subtrahend); - } - }; - - private static abstract class RelativeIterator extends IntervalIterator { - - final IntervalIterator a; - final IntervalIterator b; - - boolean bpos; - - RelativeIterator(IntervalIterator a, IntervalIterator b) { - super(a); - this.a = a; - this.b = b; - } - - @Override - protected void reset() throws IOException { - int doc = a.docID(); - bpos = b.docID() == doc || - (b.docID() < doc && b.advance(doc) == doc); - } - - @Override - public int start() { - return a.start(); - } - - @Override - public int end() { - return a.end(); - } - - @Override - public float matchCost() { - return a.matchCost() + b.matchCost(); - } - } - - private static class NonOverlappingIterator extends RelativeIterator { - - private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - super(minuend, subtrahend); - } - - @Override - public int nextInterval() throws IOException { - if (bpos == false) - return a.nextInterval(); - while (a.nextInterval() != NO_MORE_INTERVALS) { - while (b.end() < a.start()) { - if (b.nextInterval() == NO_MORE_INTERVALS) { - bpos = false; - return a.start(); - } - } - if (b.start() > a.end()) - return a.start(); - } - return NO_MORE_INTERVALS; - } - } - - /** - * Filters the minuend iterator so that only intervals that do not occur within a set number - * of positions of intervals from the subtrahend iterator are returned - */ - static class NotWithinFunction extends DifferenceIntervalFunction { - - private final int positions; - - NotWithinFunction(int positions) { - this.positions = positions; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - NotWithinFunction that = (NotWithinFunction) o; - return positions == that.positions; - } - - @Override - public String toString() { - return "NOTWITHIN/" + positions; - } - - @Override - public int hashCode() { - return Objects.hash(positions); - } - - @Override - public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) { - IntervalIterator notWithin = new IntervalIterator(subtrahend) { - - boolean positioned = false; - - @Override - public int start() { - if (positioned == false) - return -1; - int start = subtrahend.start(); - return Math.max(0, start - positions); - } - - @Override - public int end() { - if (positioned == false) - return -1; - int end = subtrahend.end(); - int newEnd = end + positions; - if (newEnd < 0) // check for overflow - return Integer.MAX_VALUE; - return newEnd; - } - - @Override - public int nextInterval() throws IOException { - if (positioned == false) { - positioned = true; - } - return subtrahend.nextInterval(); - } - - @Override - public float matchCost() { - return subtrahend.matchCost(); - } - - @Override - protected void reset() throws IOException { - // already called when the subtrahend approximation is advanced - positioned = false; - } - - }; - return NON_OVERLAPPING.apply(minuend, notWithin); - } - } - - private static class NotContainingIterator extends RelativeIterator { - - private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) { - super(minuend, subtrahend); - } - - @Override - public int nextInterval() throws IOException { - if (bpos == false) - return a.nextInterval(); - while (a.nextInterval() != NO_MORE_INTERVALS) { - while (b.start() < a.start() && b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) { - bpos = false; - return a.start(); - } - } - if (b.start() > a.end()) - return a.start(); - } - return NO_MORE_INTERVALS; - } - - } - - private static class NotContainedByIterator extends RelativeIterator { - - NotContainedByIterator(IntervalIterator a, IntervalIterator b) { - super(a, b); - } - - @Override - public int nextInterval() throws IOException { - if (bpos == false) - return a.nextInterval(); - while (a.nextInterval() != NO_MORE_INTERVALS) { - while (b.end() < a.end()) { - if (b.nextInterval() == NO_MORE_INTERVALS) - return a.start(); - } - if (a.start() < b.start()) - return a.start(); - } - return NO_MORE_INTERVALS; - } - } - - private static abstract class SingletonFunction extends DifferenceIntervalFunction { - - private final String name; - - SingletonFunction(String name) { - this.name = name; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override - public boolean equals(Object obj) { - return obj == this; - } - - @Override - public String toString() { - return name; - } - - } - - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java deleted file mode 100644 index 03ebdd5134cb..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DifferenceIntervalsSource.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; - -class DifferenceIntervalsSource extends IntervalsSource { - - final IntervalsSource minuend; - final IntervalsSource subtrahend; - final DifferenceIntervalFunction function; - - public DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend, DifferenceIntervalFunction function) { - this.minuend = minuend; - this.subtrahend = subtrahend; - this.function = function; - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - IntervalIterator minIt = minuend.intervals(field, ctx); - if (minIt == null) - return null; - IntervalIterator subIt = subtrahend.intervals(field, ctx); - if (subIt == null) - return minIt; - return function.apply(minIt, subIt); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DifferenceIntervalsSource that = (DifferenceIntervalsSource) o; - return Objects.equals(minuend, that.minuend) && - Objects.equals(subtrahend, that.subtrahend) && - Objects.equals(function, that.function); - } - - @Override - public int hashCode() { - return Objects.hash(minuend, subtrahend, function); - } - - @Override - public String toString() { - return function + "(" + minuend + ", " + subtrahend + ")"; - } - - @Override - public void extractTerms(String field, Set terms) { - minuend.extractTerms(field, terms); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java deleted file mode 100644 index 4f1e5c9511b1..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiPriorityQueue.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.intervals; - - -import java.util.Arrays; -import java.util.Iterator; - -import org.apache.lucene.util.PriorityQueue; - -/** - * A priority queue of DocIdSetIterators that orders by current doc ID. - * This specialization is needed over {@link PriorityQueue} because the - * pluggable comparison function makes the rebalancing quite slow. - * @lucene.internal - */ -public final class DisiPriorityQueue implements Iterable { - - static int leftNode(int node) { - return ((node + 1) << 1) - 1; - } - - static int rightNode(int leftNode) { - return leftNode + 1; - } - - static int parentNode(int node) { - return ((node + 1) >>> 1) - 1; - } - - private final DisiWrapper[] heap; - private int size; - - public DisiPriorityQueue(int maxSize) { - heap = new DisiWrapper[maxSize]; - size = 0; - } - - public int size() { - return size; - } - - public DisiWrapper top() { - return heap[0]; - } - - /** Get the list of scorers which are on the current doc. */ - public DisiWrapper topList() { - final DisiWrapper[] heap = this.heap; - final int size = this.size; - DisiWrapper list = heap[0]; - list.next = null; - if (size >= 3) { - list = topList(list, heap, size, 1); - list = topList(list, heap, size, 2); - } else if (size == 2 && heap[1].doc == list.doc) { - list = prepend(heap[1], list); - } - return list; - } - - // prepend w1 (iterator) to w2 (list) - private DisiWrapper prepend(DisiWrapper w1, DisiWrapper w2) { - w1.next = w2; - return w1; - } - - private DisiWrapper topList(DisiWrapper list, DisiWrapper[] heap, - int size, int i) { - final DisiWrapper w = heap[i]; - if (w.doc == list.doc) { - list = prepend(w, list); - final int left = leftNode(i); - final int right = left + 1; - if (right < size) { - list = topList(list, heap, size, left); - list = topList(list, heap, size, right); - } else if (left < size && heap[left].doc == list.doc) { - list = prepend(heap[left], list); - } - } - return list; - } - - public DisiWrapper add(DisiWrapper entry) { - final DisiWrapper[] heap = this.heap; - final int size = this.size; - heap[size] = entry; - upHeap(size); - this.size = size + 1; - return heap[0]; - } - - public DisiWrapper pop() { - final DisiWrapper[] heap = this.heap; - final DisiWrapper result = heap[0]; - final int i = --size; - heap[0] = heap[i]; - heap[i] = null; - downHeap(i); - return result; - } - - public DisiWrapper updateTop() { - downHeap(size); - return heap[0]; - } - - DisiWrapper updateTop(DisiWrapper topReplacement) { - heap[0] = topReplacement; - return updateTop(); - } - - void upHeap(int i) { - final DisiWrapper node = heap[i]; - final int nodeDoc = node.doc; - int j = parentNode(i); - while (j >= 0 && nodeDoc < heap[j].doc) { - heap[i] = heap[j]; - i = j; - j = parentNode(j); - } - heap[i] = node; - } - - void downHeap(int size) { - int i = 0; - final DisiWrapper node = heap[0]; - int j = leftNode(i); - if (j < size) { - int k = rightNode(j); - if (k < size && heap[k].doc < heap[j].doc) { - j = k; - } - if (heap[j].doc < node.doc) { - do { - heap[i] = heap[j]; - i = j; - j = leftNode(i); - k = rightNode(j); - if (k < size && heap[k].doc < heap[j].doc) { - j = k; - } - } while (j < size && heap[j].doc < node.doc); - heap[i] = node; - } - } - } - - @Override - public Iterator iterator() { - return Arrays.asList(heap).subList(0, size).iterator(); - } - -} - - diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java deleted file mode 100644 index 78c2cc215546..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisiWrapper.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TwoPhaseIterator; - -public class DisiWrapper { - - public final DocIdSetIterator iterator; - public final IntervalIterator intervals; - public final long cost; - public final float matchCost; // the match cost for two-phase iterators, 0 otherwise - public int doc; // the current doc, used for comparison - public DisiWrapper next; // reference to a next element, see #topList - - // An approximation of the iterator, or the iterator itself if it does not - // support two-phase iteration - public final DocIdSetIterator approximation; - // A two-phase view of the iterator, or null if the iterator does not support - // two-phase iteration - public final TwoPhaseIterator twoPhaseView; - - public DisiWrapper(IntervalIterator iterator) { - this.intervals = iterator; - this.iterator = iterator; - this.cost = iterator.cost(); - this.doc = -1; - this.twoPhaseView = null; - this.approximation = iterator; - this.matchCost = iterator.matchCost(); - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java deleted file mode 100644 index 30ab9d4bc676..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionDISIApproximation.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.intervals; - -import java.io.IOException; - -import org.apache.lucene.search.DocIdSetIterator; - -/** - * A {@link DocIdSetIterator} which is a disjunction of the approximations of - * the provided iterators. - * @lucene.internal - */ -class DisjunctionDISIApproximation extends DocIdSetIterator { - - final DisiPriorityQueue subIterators; - final long cost; - - public DisjunctionDISIApproximation(DisiPriorityQueue subIterators) { - this.subIterators = subIterators; - long cost = 0; - for (DisiWrapper w : subIterators) { - cost += w.cost; - } - this.cost = cost; - } - - @Override - public long cost() { - return cost; - } - - @Override - public int docID() { - return subIterators.top().doc; - } - - @Override - public int nextDoc() throws IOException { - DisiWrapper top = subIterators.top(); - final int doc = top.doc; - do { - top.doc = top.approximation.nextDoc(); - top = subIterators.updateTop(); - } while (top.doc == doc); - - return top.doc; - } - - @Override - public int advance(int target) throws IOException { - DisiWrapper top = subIterators.top(); - do { - top.doc = top.approximation.advance(target); - top = subIterators.updateTop(); - } while (top.doc < target); - - return top.doc; - } -} - - diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java deleted file mode 100644 index f1b2381b3694..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/DisjunctionIntervalsSource.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; - -class DisjunctionIntervalsSource extends IntervalsSource { - - final List subSources; - - public DisjunctionIntervalsSource(List subSources) { - this.subSources = subSources; - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - List subIterators = new ArrayList<>(); - for (IntervalsSource subSource : subSources) { - IntervalIterator it = subSource.intervals(field, ctx); - if (it != null) { - subIterators.add(it); - } - } - if (subIterators.size() == 0) - return null; - return new DisjunctionIntervalIterator(subIterators); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; - return Objects.equals(subSources, that.subSources); - } - - @Override - public int hashCode() { - return Objects.hash(subSources); - } - - @Override - public String toString() { - return subSources.stream().map(Object::toString).collect(Collectors.joining(",", "or(", ")")); - } - - @Override - public void extractTerms(String field, Set terms) { - for (IntervalsSource source : subSources) { - source.extractTerms(field, terms); - } - } - - private static class DisjunctionIntervalIterator extends IntervalIterator { - - final PriorityQueue intervalQueue; - final DisiPriorityQueue disiQueue; - final List iterators; - final float matchCost; - - IntervalIterator current = EMPTY; - - DisjunctionIntervalIterator(List iterators) { - super(buildApproximation(iterators)); - this.disiQueue = ((DisjunctionDISIApproximation)approximation).subIterators; - this.iterators = iterators; - this.intervalQueue = new PriorityQueue(iterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - // This is different to the Vigna paper, because we're interested in matching rather - // than in minimizing intervals, so a wider interval should sort before its prefixes - return a.start() < b.start() || (a.start() == b.start() && a.end() > b.end()); - //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; - float costsum = 0; - for (IntervalIterator it : iterators) { - costsum += it.cost(); - } - this.matchCost = costsum; - } - - private static DocIdSetIterator buildApproximation(List iterators) { - DisiPriorityQueue disiQueue = new DisiPriorityQueue(iterators.size()); - for (IntervalIterator it : iterators) { - disiQueue.add(new DisiWrapper(it)); - } - return new DisjunctionDISIApproximation(disiQueue); - } - - @Override - public float matchCost() { - return matchCost; - } - - @Override - public int start() { - return current.start(); - } - - @Override - public int end() { - return current.end(); - } - - @Override - protected void reset() throws IOException { - intervalQueue.clear(); - for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { - dw.intervals.nextInterval(); - intervalQueue.add(dw.intervals); - } - current = EMPTY; - } - - @Override - public int nextInterval() throws IOException { - if (current == EMPTY) { - if (intervalQueue.size() > 0) { - current = intervalQueue.top(); - } - return current.start(); - } - int start = current.start(), end = current.end(); - while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) { - IntervalIterator it = intervalQueue.pop(); - if (it != null && it.nextInterval() != NO_MORE_INTERVALS) { - intervalQueue.add(it); - } - } - if (intervalQueue.size() == 0) { - current = EMPTY; - return NO_MORE_INTERVALS; - } - current = intervalQueue.top(); - return current.start(); - } - - private boolean contains(IntervalIterator it, int start, int end) { - return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end(); - } - - } - - private static final IntervalIterator EMPTY = new IntervalIterator(DocIdSetIterator.empty()) { - - @Override - public int start() { - return -1; - } - - @Override - public int end() { - return -1; - } - - @Override - public int nextInterval() { - return NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return 0; - } - - @Override - protected void reset() throws IOException { - - } - }; - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java deleted file mode 100644 index 47fea70f312f..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFilter.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; - -/** - * Wraps an {@link IntervalIterator} and passes through those intervals that match the {@link #accept()} function - */ -public abstract class IntervalFilter extends IntervalIterator { - - private final IntervalIterator in; - - /** - * Create a new filter - */ - public IntervalFilter(IntervalIterator in) { - super(in.approximation); - this.in = in; - } - - @Override - public int start() { - return in.start(); - } - - @Override - public int end() { - return in.end(); - } - - @Override - public float matchCost() { - return in.matchCost(); - } - - @Override - protected void reset() throws IOException { - in.reset(); - } - - /** - * @return {@code true} if the wrapped iterator's interval should be passed on - */ - protected abstract boolean accept(); - - @Override - public final int nextInterval() throws IOException { - int next; - do { - next = in.nextInterval(); - } - while (accept() == false && next != IntervalIterator.NO_MORE_INTERVALS); - return next; - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java deleted file mode 100644 index 2299f152dabe..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalFunction.java +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.List; - -import org.apache.lucene.util.PriorityQueue; - -/** - * Combine a list of {@link IntervalIterator}s into another - */ -abstract class IntervalFunction { - - @Override - public abstract int hashCode(); - - @Override - public abstract boolean equals(Object obj); - - @Override - public abstract String toString(); - - /** - * Combine the iterators into another iterator - */ - public abstract IntervalIterator apply(List iterators); - - static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { - @Override - public IntervalIterator apply(List iterators) { - return new BlockIntervalIterator(iterators); - } - }; - - private static class BlockIntervalIterator extends ConjunctionIntervalIterator { - - int start = -1, end = -1; - - BlockIntervalIterator(List subIterators) { - super(subIterators); - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int nextInterval() throws IOException { - if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return IntervalIterator.NO_MORE_INTERVALS; - int i = 1; - while (i < subIterators.size()) { - while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) { - if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return IntervalIterator.NO_MORE_INTERVALS; - } - if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) { - i = i + 1; - } - else { - if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return IntervalIterator.NO_MORE_INTERVALS; - i = 1; - } - } - start = subIterators.get(0).start(); - end = subIterators.get(subIterators.size() - 1).end(); - return start; - } - - @Override - protected void reset() { - start = end = -1; - } - } - - /** - * Return an iterator over intervals where the subiterators appear in a given order - */ - static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") { - @Override - public IntervalIterator apply(List intervalIterators) { - return new OrderedIntervalIterator(intervalIterators); - } - }; - - private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { - - int start = -1, end = -1, i; - - private OrderedIntervalIterator(List subIntervals) { - super(subIntervals); - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - @Override - public int nextInterval() throws IOException { - start = end = IntervalIterator.NO_MORE_INTERVALS; - int b = Integer.MAX_VALUE; - i = 1; - while (true) { - while (true) { - if (subIterators.get(i - 1).end() >= b) - return start; - if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end()) - break; - do { - if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return start; - } - while (subIterators.get(i).start() <= subIterators.get(i - 1).end()); - i++; - } - start = subIterators.get(0).start(); - end = subIterators.get(subIterators.size() - 1).end(); - b = subIterators.get(subIterators.size() - 1).start(); - i = 1; - if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return start; - } - } - - @Override - protected void reset() throws IOException { - subIterators.get(0).nextInterval(); - i = 1; - start = end = -1; - } - } - - /** - * Return an iterator over intervals where the subiterators appear in any order - */ - static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") { - @Override - public IntervalIterator apply(List intervalIterators) { - return new UnorderedIntervalIterator(intervalIterators); - } - }; - - private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator { - - private final PriorityQueue queue; - private final IntervalIterator[] subIterators; - - int start = -1, end = -1, queueEnd; - - UnorderedIntervalIterator(List subIterators) { - super(subIterators); - this.queue = new PriorityQueue(subIterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end()); - } - }; - this.subIterators = new IntervalIterator[subIterators.size()]; - - for (int i = 0; i < subIterators.size(); i++) { - this.subIterators[i] = subIterators.get(i); - } - } - - @Override - public int start() { - return start; - } - - @Override - public int end() { - return end; - } - - void updateRightExtreme(IntervalIterator it) { - int itEnd = it.end(); - if (itEnd > queueEnd) { - queueEnd = itEnd; - } - } - - @Override - public int nextInterval() throws IOException { - while (this.queue.size() == subIterators.length && queue.top().start() == start) { - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } - if (this.queue.size() < subIterators.length) - return IntervalIterator.NO_MORE_INTERVALS; - do { - start = queue.top().start(); - end = queueEnd; - if (queue.top().end() == end) - return start; - IntervalIterator it = queue.pop(); - if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - queue.add(it); - updateRightExtreme(it); - } - } while (this.queue.size() == subIterators.length && end == queueEnd); - return start; - } - - @Override - protected void reset() throws IOException { - queueEnd = start = end = -1; - this.queue.clear(); - for (IntervalIterator it : subIterators) { - it.nextInterval(); - queue.add(it); - updateRightExtreme(it); - } - } - - } - - /** - * Returns an interval over iterators where the first iterator contains intervals from the second - */ - static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") { - @Override - public IntervalIterator apply(List iterators) { - if (iterators.size() != 2) - throw new IllegalStateException("CONTAINING function requires two iterators"); - IntervalIterator a = iterators.get(0); - IntervalIterator b = iterators.get(1); - return new ConjunctionIntervalIterator(iterators) { - - boolean bpos; - - @Override - public int start() { - return a.start(); - } - - @Override - public int end() { - return a.end(); - } - - @Override - public int nextInterval() throws IOException { - if (bpos == false) - return IntervalIterator.NO_MORE_INTERVALS; - while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - while (b.start() < a.start() && b.end() < a.end()) { - if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return IntervalIterator.NO_MORE_INTERVALS; - } - if (a.start() <= b.start() && a.end() >= b.end()) - return a.start(); - } - return IntervalIterator.NO_MORE_INTERVALS; - } - - @Override - protected void reset() throws IOException { - bpos = true; - } - }; - } - }; - - /** - * Return an iterator over intervals where the first iterator is contained by intervals from the second - */ - static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") { - @Override - public IntervalIterator apply(List iterators) { - if (iterators.size() != 2) - throw new IllegalStateException("CONTAINED_BY function requires two iterators"); - IntervalIterator a = iterators.get(0); - IntervalIterator b = iterators.get(1); - return new ConjunctionIntervalIterator(iterators) { - - boolean bpos; - - @Override - public int start() { - return a.start(); - } - - @Override - public int end() { - return a.end(); - } - - @Override - public int nextInterval() throws IOException { - if (bpos == false) - return IntervalIterator.NO_MORE_INTERVALS; - while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - while (b.end() < a.end()) { - if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return IntervalIterator.NO_MORE_INTERVALS; - } - if (b.start() <= a.start()) - return a.start(); - } - return IntervalIterator.NO_MORE_INTERVALS; - } - - @Override - protected void reset() throws IOException { - bpos = true; - } - }; - } - }; - - private static abstract class SingletonFunction extends IntervalFunction { - - private final String name; - - protected SingletonFunction(String name) { - this.name = name; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override - public boolean equals(Object obj) { - return obj == this; - } - - @Override - public String toString() { - return name; - } - - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java deleted file mode 100644 index f6a5f89efc12..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalIterator.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; - -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TwoPhaseIterator; - -/** - * A {@link DocIdSetIterator} that also allows iteration over matching - * intervals in a document. - * - * Once the iterator is positioned on a document by calling {@link #advance(int)} - * or {@link #nextDoc()}, intervals may be retrieved by calling {@link #nextInterval()} - * until {@link #NO_MORE_INTERVALS} is returned. - * - * The limits of the current interval are returned by {@link #start()} and {@link #end()}. - * When the iterator has been moved to a new document, but before {@link #nextInterval()} - * has been called, both these methods return {@code -1}. - * - * Note that it is possible for a document to return {@link #NO_MORE_INTERVALS} - * on the first call to {@link #nextInterval()} - */ -public abstract class IntervalIterator extends DocIdSetIterator { - - protected final DocIdSetIterator approximation; - - protected IntervalIterator(DocIdSetIterator approximation) { - this.approximation = approximation; - } - - /** - * When returned from {@link #nextInterval()}, indicates that there are no more - * matching intervals on the current document - */ - public static final int NO_MORE_INTERVALS = Integer.MAX_VALUE; - - @Override - public final int docID() { - return approximation.docID(); - } - - @Override - public final int nextDoc() throws IOException { - int doc = approximation.nextDoc(); - reset(); - return doc; - } - - @Override - public final int advance(int target) throws IOException { - int doc = approximation.advance(target); - reset(); - return doc; - } - - @Override - public final long cost() { - return approximation.cost(); - } - - /** - * The start of the current interval - * - * Returns -1 if {@link #nextInterval()} has not yet been called - */ - public abstract int start(); - - /** - * The end of the current interval - * - * Returns -1 if {@link #nextInterval()} has not yet been called - */ - public abstract int end(); - - /** - * Advance the iterator to the next interval - * - * @return the start of the next interval, or {@link IntervalIterator#NO_MORE_INTERVALS} if - * there are no more intervals on the current document - */ - public abstract int nextInterval() throws IOException; - - /** - * An indication of the average cost of iterating over all intervals in a document - * - * @see TwoPhaseIterator#matchCost() - */ - public abstract float matchCost(); - - /** - * Called when the underlying iterator has been advanced. - */ - protected abstract void reset() throws IOException; - - @Override - public String toString() { - return approximation.docID() + ":[" + start() + "->" + end() + "]"; - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java deleted file mode 100644 index 4d2c22f97a35..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalQuery.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; -import org.apache.lucene.search.CollectionStatistics; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.LeafSimScorer; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.Weight; -import org.apache.lucene.search.similarities.Similarity; - -/** - * A query that retrieves documents containing intervals returned from an - * {@link IntervalsSource} - * - * Static constructor functions for various different sources can be found in the - * {@link Intervals} class - */ -public final class IntervalQuery extends Query { - - private final String field; - private final IntervalsSource intervalsSource; - - /** - * Create a new IntervalQuery - * @param field the field to query - * @param intervalsSource an {@link IntervalsSource} to retrieve intervals from - */ - public IntervalQuery(String field, IntervalsSource intervalsSource) { - this.field = field; - this.intervalsSource = intervalsSource; - } - - public String getField() { - return field; - } - - @Override - public String toString(String field) { - return intervalsSource.toString(); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new IntervalWeight(this, scoreMode.needsScores() ? buildSimScorer(searcher, boost) : null, - searcher.getSimilarity(), scoreMode); - } - - private Similarity.SimScorer buildSimScorer(IndexSearcher searcher, float boost) throws IOException { - Set terms = new HashSet<>(); - intervalsSource.extractTerms(field, terms); - TermStatistics[] termStats = new TermStatistics[terms.size()]; - int termUpTo = 0; - for (Term term : terms) { - TermStatistics termStatistics = searcher.termStatistics(term, TermStates.build(searcher.getTopReaderContext(), term, true)); - if (termStatistics != null) { - termStats[termUpTo++] = termStatistics; - } - } - if (termUpTo == 0) { - return null; - } - CollectionStatistics collectionStats = searcher.collectionStatistics(field); - return searcher.getSimilarity().scorer(boost, collectionStats, Arrays.copyOf(termStats, termUpTo)); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - IntervalQuery that = (IntervalQuery) o; - return Objects.equals(field, that.field) && - Objects.equals(intervalsSource, that.intervalsSource); - } - - @Override - public int hashCode() { - return Objects.hash(field, intervalsSource); - } - - private class IntervalWeight extends Weight { - - final Similarity.SimScorer simScorer; - final Similarity similarity; - final ScoreMode scoreMode; - - public IntervalWeight(Query query, Similarity.SimScorer simScorer, Similarity similarity, ScoreMode scoreMode) { - super(query); - this.simScorer = simScorer; - this.similarity = similarity; - this.scoreMode = scoreMode; - } - - @Override - public void extractTerms(Set terms) { - intervalsSource.extractTerms(field, terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - IntervalScorer scorer = (IntervalScorer) scorer(context); - if (scorer != null) { - int newDoc = scorer.iterator().advance(doc); - if (newDoc == doc) { - return scorer.explain("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "]"); - } - } - return Explanation.noMatch("no matching intervals"); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - IntervalIterator intervals = intervalsSource.intervals(field, context); - if (intervals == null) - return null; - LeafSimScorer leafScorer = simScorer == null ? null - : new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); - return new IntervalScorer(this, intervals, leafScorer); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return true; - } - } - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java deleted file mode 100644 index a28eddcf16fd..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalScorer.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; - -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.LeafSimScorer; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.Weight; - -class IntervalScorer extends Scorer { - - private final IntervalIterator intervals; - private final LeafSimScorer simScorer; - - private float freq = -1; - private int lastScoredDoc = -1; - - protected IntervalScorer(Weight weight, IntervalIterator intervals, LeafSimScorer simScorer) { - super(weight); - this.intervals = intervals; - this.simScorer = simScorer; - } - - @Override - public int docID() { - return intervals.docID(); - } - - @Override - public float score() throws IOException { - ensureFreq(); - return simScorer.score(docID(), freq); - } - - public Explanation explain(String topLevel) throws IOException { - ensureFreq(); - Explanation freqExplanation = Explanation.match(freq, "intervalFreq=" + freq); - Explanation scoreExplanation = simScorer.explain(docID(), freqExplanation); - return Explanation.match(scoreExplanation.getValue(), - topLevel + ", result of:", - scoreExplanation); - } - - public float freq() throws IOException { - ensureFreq(); - return freq; - } - - private void ensureFreq() throws IOException { - if (lastScoredDoc != docID()) { - lastScoredDoc = docID(); - freq = 0; - do { - freq += (1.0 / (intervals.end() - intervals.start() + 1)); - } - while (intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS); - } - } - - @Override - public DocIdSetIterator iterator() { - return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); - } - - @Override - public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(intervals) { - @Override - public boolean matches() throws IOException { - return intervals.nextInterval() != IntervalIterator.NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return intervals.cost(); - } - }; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; - } - - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java deleted file mode 100644 index 7b95e4cf0f7b..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/Intervals.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.util.Arrays; - -import org.apache.lucene.util.BytesRef; - -/** - * Constructor functions for {@link IntervalsSource} types - * - * These sources implement minimum-interval algorithms taken from the paper - * - * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics - */ -public final class Intervals { - - private Intervals() {} - - /** - * Return an {@link IntervalsSource} exposing intervals for a term - */ - public static IntervalsSource term(BytesRef term) { - return new TermIntervalsSource(term); - } - - /** - * Return an {@link IntervalsSource} exposing intervals for a term - */ - public static IntervalsSource term(String term) { - return new TermIntervalsSource(new BytesRef(term)); - } - - /** - * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of terms - */ - public static IntervalsSource phrase(String... terms) { - IntervalsSource[] sources = new IntervalsSource[terms.length]; - int i = 0; - for (String term : terms) { - sources[i] = term(term); - i++; - } - return phrase(sources); - } - - /** - * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of IntervalsSources - */ - public static IntervalsSource phrase(IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK); - } - - /** - * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources - */ - public static IntervalsSource or(IntervalsSource... subSources) { - if (subSources.length == 1) - return subSources[0]; - return new DisjunctionIntervalsSource(Arrays.asList(subSources)); - } - - /** - * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals - * @param width the maximum width of intervals in the sub-source ot return - * @param subSource the sub-source to filter - */ - public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { - return new LowpassIntervalsSource(subSource, width); - } - - /** - * Create an ordered {@link IntervalsSource} with an unbounded width range - * - * Returns intervals in which the subsources all appear in the given order - * - * @param subSources an ordered set of {@link IntervalsSource} objects - */ - public static IntervalsSource ordered(IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED); - } - - /** - * Create an unordered {@link IntervalsSource} with an unbounded width range - * - * Returns intervals in which all the subsources appear. - * - * @param subSources an unordered set of queries - */ - public static IntervalsSource unordered(IntervalsSource... subSources) { - return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.UNORDERED); - } - - /** - * Create a non-overlapping IntervalsSource - * - * Returns intervals of the minuend that do not overlap with intervals from the subtrahend - - * @param minuend the {@link IntervalsSource} to filter - * @param subtrahend the {@link IntervalsSource} to filter by - */ - public static IntervalsSource nonOverlapping(IntervalsSource minuend, IntervalsSource subtrahend) { - return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING); - } - - /** - * Create a not-within {@link IntervalsSource} - * - * Returns intervals of the minuend that do not appear within a set number of positions of - * intervals from the subtrahend query - * - * @param minuend the {@link IntervalsSource} to filter - * @param positions the maximum distance that intervals from the minuend may occur from intervals - * of the subtrahend - * @param subtrahend the {@link IntervalsSource} to filter by - */ - public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) { - return new DifferenceIntervalsSource(minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions)); - } - - /** - * Create a not-containing {@link IntervalsSource} - * - * Returns intervals from the minuend that do not contain intervals of the subtrahend - * - * @param minuend the {@link IntervalsSource} to filter - * @param subtrahend the {@link IntervalsSource} to filter by - */ - public static IntervalsSource notContaining(IntervalsSource minuend, IntervalsSource subtrahend) { - return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING); - } - - /** - * Create a containing {@link IntervalsSource} - * - * Returns intervals from the big source that contain one or more intervals from - * the small source - * - * @param big the {@link IntervalsSource} to filter - * @param small the {@link IntervalsSource} to filter by - */ - public static IntervalsSource containing(IntervalsSource big, IntervalsSource small) { - return new ConjunctionIntervalsSource(Arrays.asList(big, small), IntervalFunction.CONTAINING); - } - - /** - * Create a not-contained-by {@link IntervalsSource} - * - * Returns intervals from the small {@link IntervalsSource} that do not appear within - * intervals from the big {@link IntervalsSource}. - * - * @param small the {@link IntervalsSource} to filter - * @param big the {@link IntervalsSource} to filter by - */ - public static IntervalsSource notContainedBy(IntervalsSource small, IntervalsSource big) { - return new DifferenceIntervalsSource(small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY); - } - - /** - * Create a contained-by {@link IntervalsSource} - * - * Returns intervals from the small query that appear within intervals of the big query - * - * @param small the {@link IntervalsSource} to filter - * @param big the {@link IntervalsSource} to filter by - */ - public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource big) { - return new ConjunctionIntervalsSource(Arrays.asList(small, big), IntervalFunction.CONTAINED_BY); - } - - // TODO: beforeQuery, afterQuery, arbitrary IntervalFunctions - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java deleted file mode 100644 index 405423bec334..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/IntervalsSource.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; - -/** - * A helper class for {@link IntervalQuery} that provides an {@link IntervalIterator} - * for a given field and segment - * - * Static constructor functions for various different sources can be found in the - * {@link Intervals} class - */ -public abstract class IntervalsSource { - - /** - * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} - * - * Returns {@code null} if no intervals for this field exist in this segment - * - * @param field the field to read positions from - * @param ctx the context for which to return the iterator - */ - public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; - - /** - * Expert: collect {@link Term} objects from this source, to be used for top-level term scoring - * @param field the field to be scored - * @param terms a {@link Set} which terms should be added to - */ - public abstract void extractTerms(String field, Set terms); - - @Override - public abstract int hashCode(); - - @Override - public abstract boolean equals(Object other); - - @Override - public abstract String toString(); - -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java deleted file mode 100644 index 82f4acf785be..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/LowpassIntervalsSource.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; - -class LowpassIntervalsSource extends IntervalsSource { - - final IntervalsSource in; - final int maxWidth; - - LowpassIntervalsSource(IntervalsSource in, int maxWidth) { - this.in = in; - this.maxWidth = maxWidth; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - LowpassIntervalsSource that = (LowpassIntervalsSource) o; - return maxWidth == that.maxWidth && - Objects.equals(in, that.in); - } - - @Override - public String toString() { - return "MAXWIDTH/" + maxWidth + "(" + in + ")"; - } - - @Override - public void extractTerms(String field, Set terms) { - in.extractTerms(field, terms); - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - IntervalIterator i = in.intervals(field, ctx); - return new IntervalFilter(i) { - @Override - protected boolean accept() { - return (i.end() - i.start()) + 1 <= maxWidth; - } - }; - } - - @Override - public int hashCode() { - return Objects.hash(in, maxWidth); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java deleted file mode 100644 index 00ed08984720..000000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/intervals/TermIntervalsSource.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; -import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.util.BytesRef; - -class TermIntervalsSource extends IntervalsSource { - - final BytesRef term; - - TermIntervalsSource(BytesRef term) { - this.term = term; - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - Terms terms = ctx.reader().terms(field); - if (terms == null) - return null; - if (terms.hasPositions() == false) { - throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field + " because it has no indexed positions"); - } - TermsEnum te = terms.iterator(); - te.seekExact(term); - PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS); - float cost = termPositionsCost(te); - return new IntervalIterator(pe) { - - int pos = -1, upto; - - @Override - public int start() { - return pos; - } - - @Override - public int end() { - return pos; - } - - @Override - public int nextInterval() throws IOException { - if (upto <= 0) - return pos = NO_MORE_INTERVALS; - upto--; - return pos = pe.nextPosition(); - } - - @Override - public float matchCost() { - return cost; - } - - @Override - protected void reset() throws IOException { - if (pe.docID() == NO_MORE_DOCS) { - upto = -1; - pos = NO_MORE_INTERVALS; - } - else { - upto = pe.freq(); - pos = -1; - } - } - - @Override - public String toString() { - return term.utf8ToString() + ":" + super.toString(); - } - }; - } - - @Override - public int hashCode() { - return Objects.hash(term); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - TermIntervalsSource that = (TermIntervalsSource) o; - return Objects.equals(term, that.term); - } - - @Override - public String toString() { - return term.utf8ToString(); - } - - @Override - public void extractTerms(String field, Set terms) { - terms.add(new Term(field, term)); - } - - /** A guess of - * the average number of simple operations for the initial seek and buffer refill - * per document for the positions of a term. - * See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}. - *

    - * Aside: Instead of being constant this could depend among others on - * {@link Lucene50PostingsFormat#BLOCK_SIZE}, - * {@link TermsEnum#docFreq()}, - * {@link TermsEnum#totalTermFreq()}, - * {@link DocIdSetIterator#cost()} (expected number of matching docs), - * {@link LeafReader#maxDoc()} (total number of docs in the segment), - * and the seek time and block size of the device storing the index. - */ - private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; - - /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()} - * when no seek or buffer refill is done. - */ - private static final int TERM_OPS_PER_POS = 7; - - /** Returns an expected cost in simple operations - * of processing the occurrences of a term - * in a document that contains the term. - * This is for use by {@link TwoPhaseIterator#matchCost} implementations. - * @param termsEnum The term is the term at which this TermsEnum is positioned. - */ - static float termPositionsCost(TermsEnum termsEnum) throws IOException { - // TODO: When intervals move to core, refactor to use the copy of this in PhraseQuery - int docFreq = termsEnum.docFreq(); - assert docFreq > 0; - long totalTermFreq = termsEnum.totalTermFreq(); - float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; - return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; - } -} From 873f58e347960ce0a320a85f3fe231aa100e6d5a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 19 Mar 2018 09:19:31 +0000 Subject: [PATCH 81/83] Clean up copied ConjunctionDISI - no need for TPI anywhere --- .../search/intervals/ConjunctionDISI.java | 184 +--------------- .../lucene/intervals/TestIntervalQuery.java | 159 -------------- .../lucene/intervals/TestIntervals.java | 197 ------------------ 3 files changed, 6 insertions(+), 534 deletions(-) delete mode 100644 lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java delete mode 100644 lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java index cb5963d75566..a425757f65d8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionDISI.java @@ -34,92 +34,36 @@ /** A conjunction of DocIdSetIterators. * This iterates over the doc ids that are present in each given DocIdSetIterator. - *
    Public only for use in {@link org.apache.lucene.search.spans}. * @lucene.internal */ final class ConjunctionDISI extends DocIdSetIterator { - /** Create a conjunction over the provided DocIdSetIterators. Note that the - * returned {@link DocIdSetIterator} might leverage two-phase iteration in - * which case it is possible to retrieve the {@link TwoPhaseIterator} using - * {@link TwoPhaseIterator#unwrap}. */ + /** Create a conjunction over the provided DocIdSetIterators. */ public static DocIdSetIterator intersectIterators(List iterators) { if (iterators.size() < 2) { throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators"); } final List allIterators = new ArrayList<>(); - final List twoPhaseIterators = new ArrayList<>(); for (DocIdSetIterator iterator : iterators) { - addIterator(iterator, allIterators, twoPhaseIterators); + addIterator(iterator, allIterators); } - return createConjunction(allIterators, twoPhaseIterators); + return new ConjunctionDISI(allIterators); } - private static void addIterator(DocIdSetIterator disi, List allIterators, List twoPhaseIterators) { - TwoPhaseIterator twoPhase = TwoPhaseIterator.unwrap(disi); - if (twoPhase != null) { - addTwoPhaseIterator(twoPhase, allIterators, twoPhaseIterators); - } else if (disi.getClass() == ConjunctionDISI.class) { // Check for exactly this class for collapsing + private static void addIterator(DocIdSetIterator disi, List allIterators) { + if (disi.getClass() == ConjunctionDISI.class) { // Check for exactly this class for collapsing ConjunctionDISI conjunction = (ConjunctionDISI) disi; // subconjuctions have already split themselves into two phase iterators and others, so we can take those // iterators as they are and move them up to this conjunction allIterators.add(conjunction.lead1); allIterators.add(conjunction.lead2); Collections.addAll(allIterators, conjunction.others); - } else if (disi.getClass() == BitSetConjunctionDISI.class) { - BitSetConjunctionDISI conjunction = (BitSetConjunctionDISI) disi; - allIterators.add(conjunction.lead); - Collections.addAll(allIterators, conjunction.bitSetIterators); } else { allIterators.add(disi); } } - private static void addTwoPhaseIterator(TwoPhaseIterator twoPhaseIter, List allIterators, List twoPhaseIterators) { - addIterator(twoPhaseIter.approximation(), allIterators, twoPhaseIterators); - if (twoPhaseIter.getClass() == ConjunctionTwoPhaseIterator.class) { // Check for exactly this class for collapsing - Collections.addAll(twoPhaseIterators, ((ConjunctionTwoPhaseIterator) twoPhaseIter).twoPhaseIterators); - } else { - twoPhaseIterators.add(twoPhaseIter); - } - } - - private static DocIdSetIterator createConjunction( - List allIterators, - List twoPhaseIterators) { - long minCost = allIterators.stream().mapToLong(DocIdSetIterator::cost).min().getAsLong(); - List bitSetIterators = new ArrayList<>(); - List iterators = new ArrayList<>(); - for (DocIdSetIterator iterator : allIterators) { - if (iterator.cost() > minCost && iterator instanceof BitSetIterator) { - // we put all bitset iterators into bitSetIterators - // except if they have the minimum cost, since we need - // them to lead the iteration in that case - bitSetIterators.add((BitSetIterator) iterator); - } else { - iterators.add(iterator); - } - } - - DocIdSetIterator disi; - if (iterators.size() == 1) { - disi = iterators.get(0); - } else { - disi = new ConjunctionDISI(iterators); - } - - if (bitSetIterators.size() > 0) { - disi = new BitSetConjunctionDISI(disi, bitSetIterators); - } - - if (twoPhaseIterators.isEmpty() == false) { - disi = TwoPhaseIterator.asDocIdSetIterator(new ConjunctionTwoPhaseIterator(disi, twoPhaseIterators)); - } - - return disi; - } - final DocIdSetIterator lead1, lead2; final DocIdSetIterator[] others; @@ -127,12 +71,7 @@ private ConjunctionDISI(List iterators) { assert iterators.size() >= 2; // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. - CollectionUtil.timSort(iterators, new Comparator() { - @Override - public int compare(DocIdSetIterator o1, DocIdSetIterator o2) { - return Long.compare(o1.cost(), o2.cost()); - } - }); + CollectionUtil.timSort(iterators, Comparator.comparingLong(DocIdSetIterator::cost)); lead1 = iterators.get(0); lead2 = iterators.get(1); others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]); @@ -193,115 +132,4 @@ public long cost() { return lead1.cost(); // overestimate } - /** Conjunction between a {@link DocIdSetIterator} and one or more {@link BitSetIterator}s. */ - private static class BitSetConjunctionDISI extends DocIdSetIterator { - - private final DocIdSetIterator lead; - private final BitSetIterator[] bitSetIterators; - private final BitSet[] bitSets; - private final int minLength; - - BitSetConjunctionDISI(DocIdSetIterator lead, Collection bitSetIterators) { - this.lead = lead; - assert bitSetIterators.size() > 0; - this.bitSetIterators = bitSetIterators.toArray(new BitSetIterator[0]); - // Put the least costly iterators first so that we exit as soon as possible - ArrayUtil.timSort(this.bitSetIterators, (a, b) -> Long.compare(a.cost(), b.cost())); - this.bitSets = new BitSet[this.bitSetIterators.length]; - int minLen = Integer.MAX_VALUE; - for (int i = 0; i < this.bitSetIterators.length; ++i) { - BitSet bitSet = this.bitSetIterators[i].getBitSet(); - this.bitSets[i] = bitSet; - minLen = Math.min(minLen, bitSet.length()); - } - this.minLength = minLen; - } - - @Override - public int docID() { - return lead.docID(); - } - - @Override - public int nextDoc() throws IOException { - return doNext(lead.nextDoc()); - } - - @Override - public int advance(int target) throws IOException { - return doNext(lead.advance(target)); - } - - private int doNext(int doc) throws IOException { - advanceLead: for (;; doc = lead.nextDoc()) { - if (doc >= minLength) { - return NO_MORE_DOCS; - } - for (BitSet bitSet : bitSets) { - if (bitSet.get(doc) == false) { - continue advanceLead; - } - } - for (BitSetIterator iterator : bitSetIterators) { - iterator.setDocId(doc); - } - return doc; - } - } - - @Override - public long cost() { - return lead.cost(); - } - - } - - /** - * {@link TwoPhaseIterator} implementing a conjunction. - */ - private static final class ConjunctionTwoPhaseIterator extends TwoPhaseIterator { - - private final TwoPhaseIterator[] twoPhaseIterators; - private final float matchCost; - - private ConjunctionTwoPhaseIterator(DocIdSetIterator approximation, - List twoPhaseIterators) { - super(approximation); - assert twoPhaseIterators.size() > 0; - - CollectionUtil.timSort(twoPhaseIterators, new Comparator() { - @Override - public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) { - return Float.compare(o1.matchCost(), o2.matchCost()); - } - }); - - this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]); - - // Compute the matchCost as the total matchCost of the sub iterators. - // TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight. - float totalMatchCost = 0; - for (TwoPhaseIterator tpi : twoPhaseIterators) { - totalMatchCost += tpi.matchCost(); - } - matchCost = totalMatchCost; - } - - @Override - public boolean matches() throws IOException { - for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first - if (twoPhaseIterator.matches() == false) { - return false; - } - } - return true; - } - - @Override - public float matchCost() { - return matchCost; - } - - } - } diff --git a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java deleted file mode 100644 index 489603f67fc0..000000000000 --- a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervalQuery.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.search.CheckHits; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; - -public class TestIntervalQuery extends LuceneTestCase { - - private IndexSearcher searcher; - private IndexReader reader; - private Directory directory; - - public static final String field = "field"; - - @Override - public void setUp() throws Exception { - super.setUp(); - directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); - for (int i = 0; i < docFields.length; i++) { - Document doc = new Document(); - doc.add(newTextField(field, docFields[i], Field.Store.YES)); - writer.addDocument(doc); - } - reader = writer.getReader(); - writer.close(); - searcher = newSearcher(reader); - } - - @Override - public void tearDown() throws Exception { - reader.close(); - directory.close(); - super.tearDown(); - } - - private String[] docFields = { - "w1 w2 w3 w4 w5", - "w1 w3 w2 w3", - "w1 xx w2 w4 yy w3", - "w1 w3 xx w2 yy w3", - "w2 w1", - "w2 w1 w3 w2 w4", - "coordinate genome mapping research", - "coordinate genome research" - }; - - private void checkHits(Query query, int[] results) throws IOException { - CheckHits.checkHits(random(), query, field, searcher, results); - } - - public void testPhraseQuery() throws IOException { - checkHits(new IntervalQuery(field, Intervals.phrase(Intervals.term("w1"), Intervals.term("w2"))), - new int[]{0}); - } - - public void testOrderedNearQueryWidth3() throws IOException { - checkHits(new IntervalQuery(field, Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), - new int[]{0, 1, 2, 5}); - } - - public void testOrderedNearQueryWidth4() throws IOException { - checkHits(new IntervalQuery(field, Intervals.maxwidth(4, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), - new int[]{0, 1, 2, 3, 5}); - } - - public void testNestedOrderedNearQuery() throws IOException { - // onear/1(w1, onear/2(w2, w3)) - Query q = new IntervalQuery(field, - Intervals.ordered( - Intervals.term("w1"), - Intervals.maxwidth(3, Intervals.ordered(Intervals.term("w2"), Intervals.term("w3"))))); - - checkHits(q, new int[]{0, 1, 3}); - } - - public void testUnorderedQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.unordered(Intervals.term("w1"), Intervals.term("w3"))); - checkHits(q, new int[]{0, 1, 2, 3, 5}); - } - - public void testNonOverlappingQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.nonOverlapping( - Intervals.unordered(Intervals.term("w1"), Intervals.term("w3")), - Intervals.unordered(Intervals.term("w2"), Intervals.term("w4")))); - checkHits(q, new int[]{1, 3, 5}); - } - - public void testNotWithinQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.notWithin(Intervals.term("w1"), 1, Intervals.term("w2"))); - checkHits(q, new int[]{ 1, 2, 3 }); - } - - public void testNotContainingQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.notContaining( - Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), - Intervals.term("w3") - )); - checkHits(q, new int[]{ 0, 2, 4, 5 }); - } - - public void testContainingQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.containing( - Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")), - Intervals.term("w3") - )); - checkHits(q, new int[]{ 1, 3, 5 }); - } - - public void testContainedByQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.containedBy( - Intervals.term("w3"), - Intervals.unordered(Intervals.term("w1"), Intervals.term("w2")))); - checkHits(q, new int[]{ 1, 3, 5 }); - } - - public void testNotContainedByQuery() throws IOException { - Query q = new IntervalQuery(field, Intervals.notContainedBy( - Intervals.term("w2"), - Intervals.unordered(Intervals.term("w1"), Intervals.term("w4")) - )); - checkHits(q, new int[]{ 1, 3, 4, 5 }); - } - - public void testNestedOr() throws IOException { - Query q = new IntervalQuery(field, Intervals.phrase( - Intervals.term("coordinate"), - Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")), - Intervals.term("research"))); - checkHits(q, new int[]{ 6, 7 }); - } - -} diff --git a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java deleted file mode 100644 index 182299ae9443..000000000000 --- a/lucene/sandbox/src/test/org/apache/lucene/intervals/TestIntervals.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.intervals; - -import java.io.IOException; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.LuceneTestCase; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -public class TestIntervals extends LuceneTestCase { - - private static String field1_docs[] = { - "Nothing of interest to anyone here", - "Pease porridge hot, pease porridge cold, pease porridge in the pot nine days old. Some like it hot, some like it cold, some like it in the pot nine days old", - "Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the fraggle", - "Nor here, nowt hot going on in pease this one", - "Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve", - "Porridge is great" - }; - - private static String field2_docs[] = { - "In Xanadu did Kubla Khan a stately pleasure dome decree", - "Where Alph the sacred river ran through caverns measureless to man", - "Down to a sunless sea", - "So thrice five miles of fertile ground", - "With walls and towers were girdled round", - "Which was nice" - }; - - private static Directory directory; - private static IndexSearcher searcher; - private static Analyzer analyzer = new StandardAnalyzer(); - - @BeforeClass - public static void setupIndex() throws IOException { - directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(analyzer)); - for (int i = 0; i < field1_docs.length; i++) { - Document doc = new Document(); - doc.add(new TextField("field1", field1_docs[i], Field.Store.NO)); - doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); - doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); - doc.add(new NumericDocValuesField("id", i)); - writer.addDocument(doc); - } - writer.close(); - searcher = new IndexSearcher(DirectoryReader.open(directory)); - } - - @AfterClass - public static void teardownIndex() throws IOException { - IOUtils.close(searcher.getIndexReader(), directory); - } - - private void checkIntervals(IntervalsSource source, String field, int expectedMatchCount, int[][] expected) throws IOException { - int matchedDocs = 0; - for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { - assertNull(source.intervals(field + "fake", ctx)); - NumericDocValues ids = DocValues.getNumeric(ctx.reader(), "id"); - IntervalIterator intervals = source.intervals(field, ctx); - if (intervals == null) - continue; - for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) { - ids.advance(doc); - int id = (int) ids.longValue(); - if (intervals.docID() == doc || - (intervals.docID() < doc && intervals.advance(doc) == doc)) { - int i = 0, pos; - assertEquals(-1, intervals.start()); - assertEquals(-1, intervals.end()); - while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) { - //System.out.println(doc + ": " + intervals); - assertEquals(expected[id][i], pos); - assertEquals(expected[id][i], intervals.start()); - assertEquals(expected[id][i + 1], intervals.end()); - i += 2; - } - assertEquals(expected[id].length, i); - if (i > 0) - matchedDocs++; - } - else { - assertEquals(0, expected[id].length); - } - } - } - assertEquals(expectedMatchCount, matchedDocs); - } - - public void testIntervalsOnFieldWithNoPositions() throws IOException { - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { - Intervals.term("wibble").intervals("id", searcher.getIndexReader().leaves().get(0)); - }); - assertEquals("Cannot create an IntervalIterator over field id because it has no indexed positions", e.getMessage()); - } - - public void testTermQueryIntervals() throws IOException { - checkIntervals(Intervals.term("porridge"), "field1", 4, new int[][]{ - {}, - { 1, 1, 4, 4, 7, 7 }, - { 1, 1, 4, 4, 7, 7 }, - {}, - { 1, 1, 4, 4, 7, 7 }, - { 0, 0 } - }); - } - - public void testOrderedNearIntervals() throws IOException { - checkIntervals(Intervals.ordered(Intervals.term("pease"), Intervals.term("hot")), - "field1", 3, new int[][]{ - {}, - { 0, 2, 6, 17 }, - { 3, 5, 6, 21 }, - {}, - { 0, 2, 6, 17 }, - { } - }); - } - - public void testPhraseIntervals() throws IOException { - checkIntervals(Intervals.phrase("pease", "porridge"), "field1", 3, new int[][]{ - {}, - { 0, 1, 3, 4, 6, 7 }, - { 0, 1, 3, 4, 6, 7 }, - {}, - { 0, 1, 3, 4, 6, 7 }, - {} - }); - } - - public void testUnorderedNearIntervals() throws IOException { - checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("hot")), - "field1", 4, new int[][]{ - {}, - { 0, 2, 2, 3, 6, 17 }, - { 3, 5, 5, 6, 6, 21 }, - { 3, 7 }, - { 0, 2, 2, 3, 6, 17 }, - {} - }); - } - - public void testIntervalDisjunction() throws IOException { - checkIntervals(Intervals.or(Intervals.term("pease"), Intervals.term("hot")), "field1", 4, new int[][]{ - {}, - { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, - { 0, 0, 3, 3, 5, 5, 6, 6, 21, 21}, - { 3, 3, 7, 7 }, - { 0, 0, 2, 2, 3, 3, 6, 6, 17, 17}, - {} - }); - } - - public void testNesting() throws IOException { - checkIntervals(Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.or(Intervals.term("hot"), Intervals.term("cold"))), - "field1", 3, new int[][]{ - {}, - { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, - { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, - {}, - { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, - {} - }); - } - -} From 83486fb814f28574e0f1d550893edaec30d2eccb Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 19 Mar 2018 09:41:36 +0000 Subject: [PATCH 82/83] Cleanup DisiWrapper; IntervalScorer was using the wrong cost function --- .../java/org/apache/lucene/search/intervals/DisiWrapper.java | 4 ---- .../org/apache/lucene/search/intervals/IntervalScorer.java | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java index 0dc61d481fb8..380f6c91f25e 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisiWrapper.java @@ -32,16 +32,12 @@ class DisiWrapper { // An approximation of the iterator, or the iterator itself if it does not // support two-phase iteration public final DocIdSetIterator approximation; - // A two-phase view of the iterator, or null if the iterator does not support - // two-phase iteration - public final TwoPhaseIterator twoPhaseView; public DisiWrapper(IntervalIterator iterator) { this.intervals = iterator; this.iterator = iterator; this.cost = iterator.cost(); this.doc = -1; - this.twoPhaseView = null; this.approximation = iterator; this.matchCost = iterator.matchCost(); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java index 7ff04c8c46dc..6672905df966 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalScorer.java @@ -91,7 +91,7 @@ public boolean matches() throws IOException { @Override public float matchCost() { - return intervals.cost(); + return intervals.matchCost(); } }; } From 2e01406b89d1955d0bff03e9c96a226e1ad09ef5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 19 Mar 2018 12:34:34 +0000 Subject: [PATCH 83/83] Add a 'minimize' boolean to IntervalsSource.intervals() When a disjunction interval contains intervals with the same starting point but different lengths, there are two ways in which we may want to sort things: * shortest first, to find the minimum interval * longest first, to find the best match In general, we want shortest first if the disjunction is not at the start or internal to a position-constrained interval such as a phrase query. So, for example, the disjunction ("a b" or "a") should return "a" if it is standalone, but if it is part of the phrase ("x", ("a b" or "a"), "y"), then it should return "a b". The 'minimize' boolean is generally passed down as 'true', but is marked 'false' when the IntervalsSource is the final entry in a phrase query. The only IntervalIterator to make use of this boolean is the Disjunction. --- .../intervals/ConjunctionIntervalsSource.java | 10 +++-- .../intervals/DifferenceIntervalsSource.java | 6 +-- .../intervals/DisjunctionIntervalsSource.java | 37 ++++++++++++------- .../search/intervals/IntervalFunction.java | 10 +++++ .../search/intervals/IntervalQuery.java | 2 +- .../search/intervals/IntervalsSource.java | 19 ++++++++-- .../intervals/LowpassIntervalsSource.java | 4 +- .../search/intervals/TermIntervalsSource.java | 2 +- .../search/intervals/TestIntervals.java | 25 +++++++++++++ 9 files changed, 89 insertions(+), 26 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java index d2805c978901..feda621549b6 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalsSource.java @@ -59,14 +59,18 @@ public void extractTerms(String field, Set terms) { } @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + public IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException { List subIntervals = new ArrayList<>(); - for (IntervalsSource source : subSources) { - IntervalIterator it = source.intervals(field, ctx); + for (int i = 0; i < subSources.size() - 1; i++) { + IntervalIterator it = subSources.get(i).intervals(field, ctx, function.minimizeInternalIntervals()); if (it == null) return null; subIntervals.add(it); } + IntervalIterator it = subSources.get(subSources.size() - 1).intervals(field, ctx, true); + if (it == null) + return null; + subIntervals.add(it); return function.apply(subIntervals); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java index 316b6ff14c4b..3331c84dc4e0 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalsSource.java @@ -37,11 +37,11 @@ public DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtra } @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - IntervalIterator minIt = minuend.intervals(field, ctx); + public IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException { + IntervalIterator minIt = minuend.intervals(field, ctx, minimize); if (minIt == null) return null; - IntervalIterator subIt = subtrahend.intervals(field, ctx); + IntervalIterator subIt = subtrahend.intervals(field, ctx, false); if (subIt == null) return minIt; return function.apply(minIt, subIt); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java index 31b7e7d3404a..a5bfe5a9cca5 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java @@ -38,17 +38,17 @@ public DisjunctionIntervalsSource(List subSources) { } @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + public IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException { List subIterators = new ArrayList<>(); for (IntervalsSource subSource : subSources) { - IntervalIterator it = subSource.intervals(field, ctx); + IntervalIterator it = subSource.intervals(field, ctx, minimize); if (it != null) { subIterators.add(it); } } if (subIterators.size() == 0) return null; - return new DisjunctionIntervalIterator(subIterators); + return new DisjunctionIntervalIterator(subIterators, minimize); } @Override @@ -76,6 +76,25 @@ public void extractTerms(String field, Set terms) { } } + private static PriorityQueue buildQueue(int size, boolean minimize) { + if (minimize) { + return new PriorityQueue(size) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); + } + }; + } + else { + return new PriorityQueue(size) { + @Override + protected boolean lessThan(IntervalIterator a, IntervalIterator b) { + return a.start() < b.start() || (a.start() == b.start() && a.end() > b.end()); + } + }; + } + } + private static class DisjunctionIntervalIterator extends IntervalIterator { final DocIdSetIterator approximation; @@ -86,22 +105,14 @@ private static class DisjunctionIntervalIterator extends IntervalIterator { IntervalIterator current = EMPTY; - DisjunctionIntervalIterator(List iterators) { + DisjunctionIntervalIterator(List iterators, boolean trailing) { this.disiQueue = new DisiPriorityQueue(iterators.size()); for (IntervalIterator it : iterators) { disiQueue.add(new DisiWrapper(it)); } this.approximation = new DisjunctionDISIApproximation(disiQueue); this.iterators = iterators; - this.intervalQueue = new PriorityQueue(iterators.size()) { - @Override - protected boolean lessThan(IntervalIterator a, IntervalIterator b) { - // This is different to the Vigna paper, because we're interested in matching rather - // than in minimizing intervals, so a wider interval should sort before its prefixes - return a.start() < b.start() || (a.start() == b.start() && a.end() > b.end()); - //return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start()); - } - }; + this.intervalQueue = buildQueue(iterators.size(), trailing); float costsum = 0; for (IntervalIterator it : iterators) { costsum += it.cost(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java index c550a722517b..65c8917dc4c1 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java @@ -36,16 +36,26 @@ abstract class IntervalFunction { @Override public abstract String toString(); + boolean minimizeInternalIntervals() { + return true; + } + /** * Combine the iterators into another iterator */ public abstract IntervalIterator apply(List iterators); static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") { + @Override public IntervalIterator apply(List iterators) { return new BlockIntervalIterator(iterators); } + + @Override + boolean minimizeInternalIntervals() { + return false; + } }; private static class BlockIntervalIterator extends ConjunctionIntervalIterator { diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java index 934d553717b1..84d71f8f7448 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalQuery.java @@ -138,7 +138,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio @Override public Scorer scorer(LeafReaderContext context) throws IOException { - IntervalIterator intervals = intervalsSource.intervals(field, context); + IntervalIterator intervals = intervalsSource.intervals(field, context, true); if (intervals == null) return null; LeafSimScorer leafScorer = simScorer == null ? null diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java index 9791ff87b6f7..098cbd7a108a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalsSource.java @@ -32,15 +32,28 @@ */ public abstract class IntervalsSource { + /** + * Create an {@link IntervalIterator} exposing intervals defined by this {@link IntervalsSource} + * + * Returns {@code null} if no intervals for this field exist in this segment + * + * @param field the field to read positions from + * @param ctx the context for which to return the iterator + * @param minimize true if all intervals should be minimized + */ + protected abstract IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException; + /** * Create an {@link IntervalIterator} exposing the minimum intervals defined by this {@link IntervalsSource} * * Returns {@code null} if no intervals for this field exist in this segment * - * @param field the field to read positions from - * @param ctx the context for which to return the iterator + * @param field the field to read positions from + * @param ctx the context for which to return the iterator */ - public abstract IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException; + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return this.intervals(field, ctx, true); + } /** * Expert: collect {@link Term} objects from this source, to be used for top-level term scoring diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java index 3bb469ebd79f..c259dd3bbe8a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java @@ -54,8 +54,8 @@ public void extractTerms(String field, Set terms) { } @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - IntervalIterator i = in.intervals(field, ctx); + public IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException { + IntervalIterator i = in.intervals(field, ctx, minimize); return new IntervalFilter(i) { @Override protected boolean accept() { diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java index 84e558bb09ec..eee0727d915f 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java @@ -43,7 +43,7 @@ class TermIntervalsSource extends IntervalsSource { } @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + public IntervalIterator intervals(String field, LeafReaderContext ctx, boolean minimize) throws IOException { Terms terms = ctx.reader().terms(field); if (terms == null) return null; diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java index 8f91a7f9788b..ef0a3b177198 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java @@ -194,4 +194,29 @@ public void testNesting() throws IOException { }); } + + public void testDisjunctionMinimization() throws IOException { + checkIntervals(Intervals.or(Intervals.phrase("pease", "porridge"), Intervals.term("pease")), "field1", 4, new int[][]{ + {}, + { 0, 0, 3, 3, 6, 6 }, + { 0, 0, 3, 3, 6, 6 }, + { 7, 7 }, + { 0, 0, 3, 3, 6, 6 }, + {} + }); + } + + public void testNestedDisjunctionMaximization() throws IOException { + // phrase(or(phrase("pease porridge"), "pease"), "hot") + IntervalsSource source = Intervals.phrase(Intervals.or(Intervals.term("pease"), Intervals.phrase("pease", "porridge")), Intervals.term("hot")); + checkIntervals(source, "field1", 3, new int[][]{ + {}, + { 0, 2 }, + { 3, 5 }, + {}, + { 0, 2 }, + {} + }); + } + }