Skip to content

Commit

Permalink
- Added support for multi match query.
Browse files Browse the repository at this point in the history
  • Loading branch information
martijnvg authored and kimchy committed Aug 9, 2012
1 parent d049313 commit d744300
Show file tree
Hide file tree
Showing 7 changed files with 526 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.query;

import org.elasticsearch.common.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;

/**
* Same as {@link MatchQueryBuilder} but supports multiple fields.
*/
public class MultiMatchQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder<MultiMatchQueryBuilder> {

private final Object text;

private final List<String> fields;

private MatchQueryBuilder.Type type;

private MatchQueryBuilder.Operator operator;

private String analyzer;

private Float boost;

private Integer slop;

private String fuzziness;

private Integer prefixLength;

private Integer maxExpansions;

private String minimumShouldMatch;

private String rewrite = null;

private String fuzzyRewrite = null;

private Boolean useDisMax;

private Integer tieBreaker;

/**
* Constructs a new text query.
*/
public MultiMatchQueryBuilder(Object text, String... fields) {
this.fields = Arrays.asList(fields);
this.text = text;
}

/**
* Sets the type of the text query.
*/
public MultiMatchQueryBuilder type(MatchQueryBuilder.Type type) {
this.type = type;
return this;
}

/**
* Sets the operator to use when using a boolean query. Defaults to <tt>OR</tt>.
*/
public MultiMatchQueryBuilder operator(MatchQueryBuilder.Operator operator) {
this.operator = operator;
return this;
}

/**
* Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not
* set, the default search analyzer.
*/
public MultiMatchQueryBuilder analyzer(String analyzer) {
this.analyzer = analyzer;
return this;
}

/**
* Set the boost to apply to the query.
*/
public MultiMatchQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}

/**
* Set the phrase slop if evaluated to a phrase query type.
*/
public MultiMatchQueryBuilder slop(int slop) {
this.slop = slop;
return this;
}

/**
* Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5".
*/
public MultiMatchQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = fuzziness.toString();
return this;
}

public MultiMatchQueryBuilder prefixLength(int prefixLength) {
this.prefixLength = prefixLength;
return this;
}

/**
* When using fuzzy or prefix type query, the number of term expansions to use. Defaults to unbounded
* so its recommended to set it to a reasonable value for faster execution.
*/
public MultiMatchQueryBuilder maxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
return this;
}

public MultiMatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
this.minimumShouldMatch = minimumShouldMatch;
return this;
}

public MultiMatchQueryBuilder rewrite(String rewrite) {
this.rewrite = rewrite;
return this;
}

public MultiMatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) {
this.fuzzyRewrite = fuzzyRewrite;
return this;
}

public MultiMatchQueryBuilder useDisMax(Boolean useDisMax) {
this.useDisMax = useDisMax;
return this;
}

public MultiMatchQueryBuilder setTieBreaker(Integer tieBreaker) {
this.tieBreaker = tieBreaker;
return this;
}

@Override
public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MultiMatchQueryParser.NAME);

builder.field("query", text);
builder.field("fields", fields);

if (type != null) {
builder.field("type", type.toString().toLowerCase(Locale.ENGLISH));
}
if (operator != null) {
builder.field("operator", operator.toString());
}
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (boost != null) {
builder.field("boost", boost);
}
if (slop != null) {
builder.field("slop", slop);
}
if (fuzziness != null) {
builder.field("fuzziness", fuzziness);
}
if (prefixLength != null) {
builder.field("prefix_length", prefixLength);
}
if (maxExpansions != null) {
builder.field("max_expansions", maxExpansions);
}
if (minimumShouldMatch != null) {
builder.field("minimum_should_match", minimumShouldMatch);
}
if (rewrite != null) {
builder.field("rewrite", rewrite);
}
if (fuzzyRewrite != null) {
builder.field("fuzzy_rewrite", fuzzyRewrite);
}

if (useDisMax != null) {
builder.field("use_dis_max", useDisMax);
}

if (tieBreaker != null) {
builder.field("tie_breaker", tieBreaker);
}

builder.endObject();
}
}
180 changes: 180 additions & 0 deletions src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.query;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.support.QueryParsers;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MultiMatchQuery;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
* Same ad {@link MatchQueryParser} but has support for multiple fields.
*/
public class MultiMatchQueryParser implements QueryParser {

public static final String NAME = "multi_match";

@Inject
public MultiMatchQueryParser() {
}

@Override
public String[] names() {
return new String[]{
NAME, "multiMatch"
};
}

@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();

String text = null;
float boost = 1.0f;
MatchQuery.Type type = MatchQuery.Type.BOOLEAN;
MultiMatchQuery multiMatchQuery = new MultiMatchQuery(parseContext);
String minimumShouldMatch = null;
List<String> fieldNames = Lists.newArrayList();
Map<String, Float> fieldNameToBoost = Maps.newHashMap();

XContentParser.Token token;
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
String fField = null;
float fBoost = -1;
char[] fieldText = parser.textCharacters();
int end = parser.textOffset() + parser.textLength();
for (int i = parser.textOffset(); i < end; i++) {
if (fieldText[i] == '^') {
int relativeLocation = i - parser.textOffset();
fField = new String(fieldText, parser.textOffset(), relativeLocation);
fBoost = Float.parseFloat(new String(fieldText, i + 1, parser.textLength() - relativeLocation - 1));
break;
}
}
if (fField == null) {
fField = parser.text();
}

if (Regex.isSimpleMatchPattern(fField)) {
for (String field : parseContext.mapperService().simpleMatchToIndexNames(fField)) {
fieldNames.add(field);
if (fBoost != -1) {
fieldNameToBoost.put(field, fBoost);
}
}
} else {
fieldNames.add(fField);
if (fBoost != -1) {
fieldNameToBoost.put(fField, fBoost);
}
}
}
} else {
throw new QueryParsingException(parseContext.index(), "[query_string] query does not support [" + currentFieldName + "]");
}
} else if (token.isValue()) {
if ("query".equals(currentFieldName)) {
text = parser.text();
} else if ("type".equals(currentFieldName)) {
String tStr = parser.text();
if ("boolean".equals(tStr)) {
type = MatchQuery.Type.BOOLEAN;
} else if ("phrase".equals(tStr)) {
type = MatchQuery.Type.PHRASE;
} else if ("phrase_prefix".equals(tStr) || "phrasePrefix".equals(currentFieldName)) {
type = MatchQuery.Type.PHRASE_PREFIX;
}
} else if ("analyzer".equals(currentFieldName)) {
String analyzer = parser.text();
if (parseContext.analysisService().analyzer(analyzer) == null) {
throw new QueryParsingException(parseContext.index(), "[match] analyzer [" + parser.text() + "] not found");
}
multiMatchQuery.setAnalyzer(analyzer);
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
multiMatchQuery.setPhraseSlop(parser.intValue());
} else if ("fuzziness".equals(currentFieldName)) {
multiMatchQuery.setFuzziness(parser.textOrNull());
} else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) {
multiMatchQuery.setFuzzyPrefixLength(parser.intValue());
} else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) {
multiMatchQuery.setMaxExpansions(parser.intValue());
} else if ("operator".equals(currentFieldName)) {
String op = parser.text();
if ("or".equalsIgnoreCase(op)) {
multiMatchQuery.setOccur(BooleanClause.Occur.SHOULD);
} else if ("and".equalsIgnoreCase(op)) {
multiMatchQuery.setOccur(BooleanClause.Occur.MUST);
} else {
throw new QueryParsingException(parseContext.index(), "text query requires operator to be either 'and' or 'or', not [" + op + "]");
}
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
minimumShouldMatch = parser.textOrNull();
} else if ("rewrite".equals(currentFieldName)) {
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) {
multiMatchQuery.setUseDisMax(parser.booleanValue());
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
multiMatchQuery.setTieBreaker(parser.intValue());
} else {
throw new QueryParsingException(parseContext.index(), "[match] query does not support [" + currentFieldName + "]");
}
}
}

if (text == null) {
throw new QueryParsingException(parseContext.index(), "No text specified for match_all query");
}

if (fieldNames.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "No fields specified for match_all query");
}

Query query = multiMatchQuery.parse(type, fieldNames, text);

if (query instanceof BooleanQuery) {
Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
}

query.setBoost(boost);
return query;
}
}

0 comments on commit d744300

Please sign in to comment.