Skip to content

Commit

Permalink
LUCENE-8286 UH: Update some tests to work with Weight Matcher mode
Browse files Browse the repository at this point in the history
  • Loading branch information
dsmiley committed Jul 11, 2018
1 parent a94e187 commit 0e2a579
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,25 @@ static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analy
return new UnifiedHighlighter(searcher, indexAnalyzer);
} else {
final UnifiedHighlighter uh = new UnifiedHighlighter(searcher, indexAnalyzer) {
Set<HighlightFlag> flags; // consistently random set of flags for this test run
@Override
protected Set<HighlightFlag> getFlags(String field) {
if (flags != null) {
return flags;
}
final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
int r = random().nextInt();
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
if (((1 << highlightFlag.ordinal()) & r) == 0) {
result.add(highlightFlag);
}
}
return result;
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
// these two are required for WEIGHT_MATCHES
result.add(HighlightFlag.MULTI_TERM_QUERY);
result.add(HighlightFlag.PHRASES);
}
return flags = result;
}
};
uh.setCacheFieldValCharsThreshold(random().nextInt(100));
Expand Down Expand Up @@ -420,7 +429,11 @@ public void testBuddhism() throws Exception {
highlighter.setHighlightPhrasesStrictly(false);
String snippets[] = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
if (highlighter.getFlags("body").containsAll(EnumSet.of(HighlightFlag.WEIGHT_MATCHES, HighlightFlag.PHRASES))) {
assertTrue(snippets[0], snippets[0].contains("<b>Buddhist origins</b>"));
} else {
assertTrue(snippets[0], snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
}
ir.close();
}

Expand Down Expand Up @@ -1187,6 +1200,19 @@ protected Predicate<String> getFieldMatcher(String field) {
ir.close();
}

public void testMatchesSlopBug() throws IOException {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This is the title field</b>.", snippets[0]);
ir.close();
}

public void testFieldMatcherPhraseQuery() throws Exception {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
Expand All @@ -1197,7 +1223,7 @@ protected Predicate<String> getFieldMatcher(String field) {
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES));
UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY));
highlighterFieldMatch.setFieldMatcher(null);//default
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
Expand All @@ -1214,16 +1240,28 @@ protected Predicate<String> getFieldMatcher(String field) {
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
if (highlighterNoFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is the title field</b>.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
}

snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is the title</b> field.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
}

highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is</b> the title field.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher(null);
}

Expand All @@ -1233,11 +1271,20 @@ protected Predicate<String> getFieldMatcher(String field) {
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
if (highlighterNoFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is the text field</b>. <b>You can put some text</b> if you want.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
}

snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
if (highlighterFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is</b> the text field. <b>You can put some text</b> if you want.", snippets[0]);
} else {
//nocommit why does this highlight the first "text"?
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
}

highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
Expand All @@ -1252,17 +1299,28 @@ protected Predicate<String> getFieldMatcher(String field) {
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
if (highlighterNoFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is the category field</b>.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
}

snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);

if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is the category field</b>.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
}

highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("<b>This is</b> the category field.", snippets[0]);
} else {
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher(null);
}
ir.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer;
Expand Down Expand Up @@ -51,6 +52,7 @@
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
Expand Down Expand Up @@ -137,7 +139,7 @@ public void testBasics() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
}

public void testWithSameTermQuery() throws IOException {
Expand All @@ -154,7 +156,7 @@ public void testWithSameTermQuery() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);

// test the Passage only has 3 matches. We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
highlighter.setFormatter(new PassageFormatter() {
Expand Down Expand Up @@ -198,7 +200,7 @@ public void testSubPhrases() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
assertArrayEquals(new String[]{"<b>alpha bravo charlie</b> - charlie bravo alpha"}, snippets);

// test the Passage only has 3 matches. We don't want duplicates from both PhraseQuery
highlighter.setFormatter(new PassageFormatter() {
Expand All @@ -223,7 +225,7 @@ public void testSynonyms() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad"}, snippets);
assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
}

/**
Expand Down Expand Up @@ -251,7 +253,7 @@ public void testRewriteAndMtq() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
snippets);

// do again, this time with MTQ disabled. We should only find "alpha bravo".
Expand Down Expand Up @@ -287,7 +289,7 @@ public void testRewrite() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"},
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"},
snippets);

// do again, this time with MTQ disabled. We should only find "alpha bravo".
Expand Down Expand Up @@ -324,7 +326,7 @@ public void testMtq() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
snippets);

// do again, this time with MTQ disabled.
Expand All @@ -333,6 +335,7 @@ public void testMtq() throws IOException {
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);

//note: without MTQ, the WEIGHT_MATCHES is disabled which affects the snippet boundaries
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"},
snippets);
}
Expand All @@ -350,7 +353,7 @@ public void testMultiValued() throws IOException {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);

assertArrayEquals(new String[]{"<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six"},
assertArrayEquals(new String[]{"<b>one bravo</b> three... <b>four bravo</b> six"},
snippets);


Expand Down Expand Up @@ -380,7 +383,11 @@ public void testMultiValued() throws IOException {
topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("one <b>bravo</b> <b>three</b>... four <b>bravo</b> six", snippets[0]);
} else {
assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
}
}

public void testMaxLen() throws IOException {
Expand All @@ -389,18 +396,18 @@ public void testMaxLen() throws IOException {
highlighter.setMaxLength(21);

BooleanQuery query = new BooleanQuery.Builder()
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
.build();

TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);

if (fieldType == UHTestHelper.reanalysisType) {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
if (fieldType == UHTestHelper.reanalysisType || highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> -"}, snippets);
}
}

Expand Down Expand Up @@ -435,6 +442,13 @@ public void testPreSpanQueryRewrite() throws IOException {
initReaderSearcherHighlighter();

highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
final Set<HighlightFlag> flags = super.getFlags(field);
flags.remove(HighlightFlag.WEIGHT_MATCHES);//unsupported
return flags;
}

@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,31 @@
*/
package org.apache.lucene.search.uhighlight;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.*;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.ParallelLeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
Expand All @@ -35,13 +53,6 @@
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Map;

/**
* Tests highlighting for matters *expressly* relating to term vectors.
* <p>
Expand Down Expand Up @@ -182,7 +193,12 @@ public void testUserFailedToIndexOffsets() throws IOException {
iw.close();

IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
return Collections.emptySet();//no WEIGHT_MATCHES
}
};
TermQuery query = new TermQuery(new Term("body", "vectors"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
try {
Expand Down

0 comments on commit 0e2a579

Please sign in to comment.