Skip to content

Commit

Permalink
Fix cross_fields type on multi_match query with synonyms (#21638)
Browse files Browse the repository at this point in the history
* Fix cross_fields type on multi_match query with synonyms

This change fixes the cross_fields type of the multi_match query when synonyms are involved.
Since 2.x the Lucene query parser creates SynonymQuery for words that appear at the same position.
For simple term query the CrossFieldsQueryBuilder expands the term to all requested fields and creates a BlendedTermQuery.
This change adds the same mechanism for SynonymQuery which otherwise are not expanded to all requested fields.
As a side note I wonder if we should not replace the BlendedTermQuery with the SynonymQuery. They have the same purpose and behave similarly.

Fixes #21633

* Fallback to SynonymQuery for blended terms on a single field
  • Loading branch information
jimczi committed Nov 28, 2016
1 parent df6d91b commit 6b35728
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 34 deletions.
10 changes: 10 additions & 0 deletions core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.QueryBuilder;
import org.elasticsearch.ElasticsearchException;
Expand Down Expand Up @@ -302,6 +303,11 @@ protected Query newTermQuery(Term term) {
return blendTermQuery(term, mapper);
}

@Override
protected Query newSynonymQuery(Term[] terms) {
return blendTermsQuery(terms, mapper);
}

public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
float boost = 1;
Expand Down Expand Up @@ -358,6 +364,10 @@ public Query createCommonTermsQuery(String field, String queryText, Occur highFr
}
}

protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
return new SynonymQuery(terms);
}

protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
if (fuzziness != null) {
if (fieldType != null) {
Expand Down
Expand Up @@ -158,6 +158,10 @@ public Query blendTerm(Term term, MappedFieldType fieldType) {
return MultiMatchQuery.super.blendTermQuery(term, fieldType);
}

public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
return MultiMatchQuery.super.blendTermsQuery(terms, fieldType);
}

public Query termQuery(MappedFieldType fieldType, Object value) {
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
}
Expand Down Expand Up @@ -223,6 +227,18 @@ public List<Query> buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map<Str
return queries.isEmpty() ? null : queries;
}

@Override
public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
if (blendedFields == null || blendedFields.length == 1) {
return super.blendTerms(terms, fieldType);
}
BytesRef[] values = new BytesRef[terms.length];
for (int i = 0; i < terms.length; i++) {
values[i] = terms[i].bytes();
}
return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields);
}

@Override
public Query blendTerm(Term term, MappedFieldType fieldType) {
if (blendedFields == null) {
Expand All @@ -243,44 +259,51 @@ public Query termQuery(MappedFieldType fieldType, Object value) {
}

static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
FieldAndFieldType... blendedFields) {
return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields);
}

static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker,
FieldAndFieldType... blendedFields) {
List<Query> queries = new ArrayList<>();
Term[] terms = new Term[blendedFields.length];
float[] blendedBoost = new float[blendedFields.length];
Term[] terms = new Term[blendedFields.length * values.length];
float[] blendedBoost = new float[blendedFields.length * values.length];
int i = 0;
for (FieldAndFieldType ft : blendedFields) {
Query query;
try {
query = ft.fieldType.termQuery(value, context);
} catch (IllegalArgumentException e) {
// the query expects a certain class of values such as numbers
// of ip addresses and the value can't be parsed, so ignore this
// field
continue;
} catch (ElasticsearchParseException parseException) {
// date fields throw an ElasticsearchParseException with the
// underlying IAE as the cause, ignore this field if that is
// the case
if (parseException.getCause() instanceof IllegalArgumentException) {
for (BytesRef term : values) {
Query query;
try {
query = ft.fieldType.termQuery(term, context);
} catch (IllegalArgumentException e) {
// the query expects a certain class of values such as numbers
// of ip addresses and the value can't be parsed, so ignore this
// field
continue;
} catch (ElasticsearchParseException parseException) {
// date fields throw an ElasticsearchParseException with the
// underlying IAE as the cause, ignore this field if that is
// the case
if (parseException.getCause() instanceof IllegalArgumentException) {
continue;
}
throw parseException;
}
throw parseException;
}
float boost = ft.boost;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost *= bq.getBoost();
}
if (query.getClass() == TermQuery.class) {
terms[i] = ((TermQuery) query).getTerm();
blendedBoost[i] = boost;
i++;
} else {
if (boost != 1f) {
query = new BoostQuery(query, boost);
float boost = ft.boost;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost *= bq.getBoost();
}
if (query.getClass() == TermQuery.class) {
terms[i] = ((TermQuery) query).getTerm();
blendedBoost[i] = boost;
i++;
} else {
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
queries.add(query);
}
queries.add(query);
}
}
if (i > 0) {
Expand Down Expand Up @@ -317,6 +340,14 @@ protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
return queryBuilder.blendTerm(term, fieldType);
}

@Override
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
if (queryBuilder == null) {
return super.blendTermsQuery(terms, fieldType);
}
return queryBuilder.blendTerms(terms, fieldType);
}

static final class FieldAndFieldType {
final MappedFieldType fieldType;
final float boost;
Expand Down
Expand Up @@ -28,10 +28,12 @@
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService;
Expand All @@ -55,18 +57,25 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {

@Before
public void setup() throws IOException {
IndexService indexService = createIndex("test");
Settings settings = Settings.builder()
.put("index.analysis.filter.syns.type","synonym")
.putArray("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","standard")
.put("index.analysis.analyzer.syns.filter","syns").build();
IndexService indexService = createIndex("test", settings);
MapperService mapperService = indexService.mapperService();
String mapping = "{\n" +
" \"person\":{\n" +
" \"properties\":{\n" +
" \"name\":{\n" +
" \"properties\":{\n" +
" \"first\": {\n" +
" \"type\":\"text\"\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" }," +
" \"last\": {\n" +
" \"type\":\"text\"\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" }" +
" }" +
" }\n" +
Expand Down Expand Up @@ -176,4 +185,34 @@ public void testMultiMatchPrefixWithAllField() throws IOException {
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\""));
}

public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> { throw new UnsupportedOperationException(); });

// check that synonym query is used for a single field
Query parsedQuery =
multiMatchQuery("quick").field("name.first")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
Term[] terms = new Term[2];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
Query expectedQuery = new SynonymQuery(terms);
assertThat(parsedQuery, equalTo(expectedQuery));

// check that blended term query is used for multiple fields
parsedQuery =
multiMatchQuery("quick").field("name.first").field("name.last")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
terms = new Term[4];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[2] = new Term("name.last", "quick");
terms[3] = new Term("name.last", "fast");
float[] boosts = new float[4];
Arrays.fill(boosts, 1.0f);
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
assertThat(parsedQuery, equalTo(expectedQuery));

}
}

0 comments on commit 6b35728

Please sign in to comment.