Permalink
Browse files

Stop FVH from throwing away some query boosts

The FVH was throwing away some boosts on queries stopping a number of
ways to boost phrase matches to the top of the list of fragments from
working.

The plain highlighter also doesn't work for this but that is because it
doesn't support the concept of the same term having a different score at
different positions.

Also update documentation claiming that FHV is nicer for weighing terms
found by query combinations.

Closes #4351
  • Loading branch information...
nik9000 authored and jpountz committed Dec 5, 2013
1 parent 522d620 commit 8bd9e34e39eb586f1180c868f8b07b8c2b2cbdf2
@@ -77,6 +77,9 @@ will be used instead of the plain highlighter. The fast vector highlighter:
increases the size of the index
* Can combine matches from multiple fields into one result. See
`matched_fields`
* Can assign different weights to matches at different positions allowing
for things like phrase matches being sorted above term matches when
highlighting a Boosting Query that boosts phrase matches over term matches
Here is an example of setting the `content` field to allow for
highlighting using the fast vector highlighter on it (this will cause
@@ -25,6 +25,8 @@
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.XFilteredQuery;
@@ -66,10 +68,36 @@ public CustomFieldQuery(Query query, IndexReader reader, boolean phraseHighlight
@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
if (sourceQuery instanceof DisjunctionMaxQuery) {
assert Lucene.VERSION == Version.LUCENE_46 : "LUCENE-5361";
if( sourceQuery instanceof BooleanQuery ){
BooleanQuery bq = (BooleanQuery)sourceQuery;
if (bq.getBoost() == 1) {
for( BooleanClause clause : bq.getClauses() ) {
if(!clause.isProhibited()) {
flatten(clause.getQuery(), reader, flatQueries);
}
}
} else {
for( BooleanClause clause : bq.getClauses() ) {
if(!clause.isProhibited()) {
Query cloned = clause.getQuery().clone();
cloned.setBoost(cloned.getBoost() * bq.getBoost());
flatten(cloned, reader, flatQueries);
}
}
}
} else if (sourceQuery instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
for (Query query : dmq) {
flatten(query, reader, flatQueries);
if (dmq.getBoost() == 1) {
for (Query query : dmq) {
flatten(query, reader, flatQueries);
}
} else {
for (Query query : dmq) {
Query clone = query.clone();
clone.setBoost(clone.getBoost() * dmq.getBoost());
flatten(clone, reader, flatQueries);
}
}
} else if (sourceQuery instanceof SpanTermQuery) {
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
@@ -2494,4 +2494,82 @@ public void testPlainHighlighterCustomIndexName() {
.addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence."));
}
@Test
public void testFastVectorHighlighterPhraseBoost() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
phraseBoostTestCase("fvh");
}
@Test
public void testPostingsHighlighterPhraseBoost() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
phraseBoostTestCase("postings");
}
/**
* Test phrase boosting over normal term matches. Note that this will never pass with the plain highlighter
* because it doesn't support the concept of terms having a different weight based on position.
* @param highlighterType highlighter to test
*/
private void phraseBoostTestCase(String highlighterType) {
ensureGreen();
StringBuilder text = new StringBuilder();
text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n");
for (int i = 0; i<10; i++) {
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
}
text.append("highlight words together\n");
for (int i = 0; i<10; i++) {
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
}
index("test", "type1", "1", "field1", text.toString());
refresh();
// Match queries
phraseBoostTestCaseForClauses(highlighterType, 100f,
matchQuery("field1", "highlight words together"),
matchPhraseQuery("field1", "highlight words together"));
// Query string with a single field
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1"),
queryString("\"highlight words together\"").field("field1").autoGeneratePhraseQueries(true));
// Query string with a single field without dismax
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1").useDisMax(false),
queryString("\"highlight words together\"").field("field1").useDisMax(false).autoGeneratePhraseQueries(true));
// Query string with more than one field
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1").field("field2"),
queryString("\"highlight words together\"").field("field1").field("field2").autoGeneratePhraseQueries(true));
// Query string boosting the field
phraseBoostTestCaseForClauses(highlighterType, 1f,
queryString("highlight words together").field("field1"),
queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
}
private <P extends QueryBuilder & BoostableQueryBuilder> void
phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
Matcher<String> highlightedMatcher = either(containsString("<em>highlight words together</em>")).or(
containsString("<em>highlight</em> <em>words</em> <em>together</em>"));
SearchRequestBuilder search = client().prepareSearch("test").setHighlighterRequireFieldMatch(true)
.setHighlighterOrder("score").setHighlighterType(highlighterType)
.addHighlightedField("field1", 100, 1);
// Try with a bool query
phrase.boost(boost);
SearchResponse response = search.setQuery(boolQuery().must(terms).should(phrase)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
phrase.boost(1);
// Try with a boosting query
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(boost).negativeBoost(1)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
// Try with a boosting query using a negative boost
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(1).negativeBoost(1/boost)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
}
}

0 comments on commit 8bd9e34

Please sign in to comment.