Skip to content

Commit

Permalink
Clarify what query components cross_fields supports (#69209)
Browse files Browse the repository at this point in the history
The parsing logic for the `cross_fields` mode was very general, making it hard
to tell what query components it actually supports. This PR clarifies that only
'bag of words' queries are supported, it does not accept phrase or prefix
queries. It also renames `BlendedQueryBuilder` -> `CrossFieldsQueryBuilder` for
clarity.
  • Loading branch information
jtibshirani committed Feb 18, 2021
1 parent 97389a5 commit cea85ed
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
assert analyzer != null;

MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType, enablePositionIncrements, autoGenerateSynonymsPhraseQuery);
String resolvedFieldName = fieldType.name();
String stringValue = value.toString();

/*
* If a keyword analyzer is used, we know that further analysis isn't
Expand All @@ -262,7 +264,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
* a prefix query instead
*/
if (analyzer == Lucene.KEYWORD_ANALYZER && type != Type.PHRASE_PREFIX) {
final Term term = new Term(fieldType.name(), value.toString());
final Term term = new Term(resolvedFieldName, stringValue);
if (type == Type.BOOLEAN_PREFIX
&& (fieldType instanceof TextFieldMapper.TextFieldType || fieldType instanceof KeywordFieldMapper.KeywordFieldType)) {
return builder.newPrefixQuery(term);
Expand All @@ -271,11 +273,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
}
}

return parseInternal(type, fieldType.name(), builder, value);
}

protected final Query parseInternal(Type type, String fieldName, MatchQueryBuilder builder, Object value) throws IOException {
final Query query;
Query query;
switch (type) {
case BOOLEAN:
if (commonTermsCutoff == null) {
Expand All @@ -284,23 +282,18 @@ protected final Query parseInternal(Type type, String fieldName, MatchQueryBuild
query = createCommonTermsQuery(builder, fieldName, value.toString(), occur, occur, commonTermsCutoff);
}
break;

case BOOLEAN_PREFIX:
query = builder.createBooleanPrefixQuery(fieldName, value.toString(), occur);
query = builder.createBooleanPrefixQuery(resolvedFieldName, stringValue, occur);
break;

case PHRASE:
query = builder.createPhraseQuery(fieldName, value.toString(), phraseSlop);
query = builder.createPhraseQuery(resolvedFieldName, stringValue, phraseSlop);
break;

case PHRASE_PREFIX:
query = builder.createPhrasePrefixQuery(fieldName, value.toString(), phraseSlop);
query = builder.createPhrasePrefixQuery(resolvedFieldName, stringValue, phraseSlop);
break;

default:
throw new IllegalStateException("No type found for [" + type + "]");
}

return query == null ? zeroTermsQuery() : query;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand Down Expand Up @@ -66,7 +67,7 @@ public Query parse(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNam
break;

case CROSS_FIELDS:
queries = buildCrossFieldQuery(type, fieldNames, value, minimumShouldMatch, tieBreaker);
queries = buildCrossFieldQuery(fieldNames, value, minimumShouldMatch, tieBreaker);
break;

default:
Expand Down Expand Up @@ -108,15 +109,16 @@ private List<Query> buildFieldQueries(MultiMatchQueryBuilder.Type type, Map<Stri
return queries;
}

private List<Query> buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames,
Object value, String minimumShouldMatch, float tieBreaker) throws IOException {
private List<Query> buildCrossFieldQuery(Map<String, Float> fieldNames,
Object value, String minimumShouldMatch, float tieBreaker) {

Map<Analyzer, List<FieldAndBoost>> groups = new HashMap<>();
List<Query> queries = new ArrayList<>();
for (Map.Entry<String, Float> entry : fieldNames.entrySet()) {
String name = entry.getKey();
MappedFieldType fieldType = context.getFieldType(name);
if (fieldType != null) {
Analyzer actualAnalyzer = getAnalyzer(fieldType, type == MultiMatchQueryBuilder.Type.PHRASE);
Analyzer actualAnalyzer = getAnalyzer(fieldType, false);
if (groups.containsKey(actualAnalyzer) == false) {
groups.put(actualAnalyzer, new ArrayList<>());
}
Expand All @@ -130,7 +132,7 @@ private List<Query> buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map<S
builder = new MatchQueryBuilder(group.getKey(), group.getValue().get(0).fieldType,
enablePositionIncrements, autoGenerateSynonymsPhraseQuery);
} else {
builder = new BlendedQueryBuilder(group.getKey(), group.getValue(), tieBreaker,
builder = new CrossFieldsQueryBuilder(group.getKey(), group.getValue(), tieBreaker,
enablePositionIncrements, autoGenerateSynonymsPhraseQuery);
}

Expand All @@ -140,7 +142,11 @@ private List<Query> buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map<S
* fields are already grouped by their analyzers/types.
*/
String representativeField = group.getValue().get(0).fieldType.name();
Query query = parseInternal(type.matchQueryType(), representativeField, builder, value);
Query query = builder.createBooleanQuery(representativeField, value.toString(), occur);
if (query == null) {
query = zeroTermsQuery();
}

query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch);
if (query != null) {
if (group.getValue().size() == 1) {
Expand All @@ -157,17 +163,32 @@ private List<Query> buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map<S
return queries;
}

private class BlendedQueryBuilder extends MatchQueryBuilder {
private class CrossFieldsQueryBuilder extends MatchQueryBuilder {
private final List<FieldAndBoost> blendedFields;
private final float tieBreaker;

BlendedQueryBuilder(Analyzer analyzer, List<FieldAndBoost> blendedFields, float tieBreaker,
CrossFieldsQueryBuilder(Analyzer analyzer, List<FieldAndBoost> blendedFields, float tieBreaker,
boolean enablePositionIncrements, boolean autoGenerateSynonymsPhraseQuery) {
super(analyzer, blendedFields.get(0).fieldType, enablePositionIncrements, autoGenerateSynonymsPhraseQuery);
this.blendedFields = blendedFields;
this.tieBreaker = tieBreaker;
}

@Override
public Query createPhraseQuery(String field, String queryText, int phraseSlop) {
throw new IllegalArgumentException("[multi_match] queries in [cross_fields] mode don't support phrases");
}

@Override
protected Query createPhrasePrefixQuery(String field, String queryText, int slop) {
throw new IllegalArgumentException("[multi_match] queries in [cross_fields] mode don't support phrase prefix");
}

@Override
protected Query createBooleanPrefixQuery(String field, String queryText, BooleanClause.Occur occur) {
throw new IllegalArgumentException("[multi_match] queries in [cross_fields] mode don't support boolean prefix");
}

@Override
protected Query newSynonymQuery(TermAndBoost[] terms) {
BytesRef[] values = new BytesRef[terms.length];
Expand All @@ -184,15 +205,7 @@ protected Query newTermQuery(Term term, float boost) {

@Override
protected Query newPrefixQuery(Term term) {
List<Query> disjunctions = new ArrayList<>();
for (FieldAndBoost fieldType : blendedFields) {
Query query = fieldType.fieldType.prefixQuery(term.text(), null, context);
if (fieldType.boost != 1f) {
query = new BoostQuery(query, fieldType.boost);
}
disjunctions.add(query);
}
return new DisjunctionMaxQuery(disjunctions, tieBreaker);
throw new IllegalArgumentException("[multi_match] queries in [cross_fields] mode don't support prefix");
}

@Override
Expand Down

0 comments on commit cea85ed

Please sign in to comment.