Skip to content

Commit

Permalink
Add term query support to rank_features mapped field (#93247)
Browse files Browse the repository at this point in the history
This adds term query capabilities for rank_features fields. term queries against rank_features are not scored in the typical way as regular fields. This is because the stored feature values take advantage of the term frequency storage mechanism, and thus regular BM25 does not work.

Instead, a term query against a rank_features field is very similar to linear rank_feature query. If more complicated combinations of features and values are required, the rank_feature query should be used.
  • Loading branch information
benwtrent committed Feb 1, 2023
1 parent 7f9f3bc commit 323a13a
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 7 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/93247.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 93247
summary: Add `term` query support to `rank_features` mapped field
area: Search
type: enhancement
issues: []
5 changes: 4 additions & 1 deletion docs/reference/mapping/types/rank-features.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ NOTE: `rank_features` fields only support single-valued features and strictly
positive values. Multi-valued fields and zero or negative values will be rejected.

NOTE: `rank_features` fields do not support sorting or aggregating and may
only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> queries.
only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> or <<query-dsl-term-query, `term`>> queries.

NOTE: <<query-dsl-term-query, `term`>> queries on `rank_features` fields are scored by multiplying the matched
stored feature value by the provided `boost`.

NOTE: `rank_features` fields only preserve 9 significant bits for the
precision, which translates to a relative error of about 0.4%.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.mapper;

import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESIntegTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;

import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.equalTo;

public class RankFeaturesMapperIntegrationIT extends ESIntegTestCase {

private static final String LOWER_RANKED_FEATURE = "ten";
private static final String HIGHER_RANKED_FEATURE = "twenty";
private static final String INDEX_NAME = "rank_feature_test";
private static final String FIELD_NAME = "all_rank_features";

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(MapperExtrasPlugin.class);
}

public void testRankFeaturesTermQuery() throws IOException {
init();
SearchResponse response = client().prepareSearch(INDEX_NAME)
.setQuery(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE))
.get();
assertThat(response.getHits().getTotalHits().value, equalTo(2L));
for (SearchHit hit : response.getHits().getHits()) {
assertThat(hit.getScore(), equalTo(20f));
}

response = client().prepareSearch(INDEX_NAME)
.setQuery(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE).boost(100f))
.get();
assertThat(response.getHits().getTotalHits().value, equalTo(2L));
for (SearchHit hit : response.getHits().getHits()) {
assertThat(hit.getScore(), equalTo(2000f));
}

response = client().prepareSearch(INDEX_NAME)
.setQuery(
QueryBuilders.boolQuery()
.should(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE))
.should(QueryBuilders.termQuery(FIELD_NAME, LOWER_RANKED_FEATURE).boost(3f))
.minimumShouldMatch(1)
)
.get();
assertThat(response.getHits().getTotalHits().value, equalTo(3L));
for (SearchHit hit : response.getHits().getHits()) {
if (hit.getId().equals("all")) {
assertThat(hit.getScore(), equalTo(50f));
}
if (hit.getId().equals("lower")) {
assertThat(hit.getScore(), equalTo(30f));
}
if (hit.getId().equals("higher")) {
assertThat(hit.getScore(), equalTo(20f));
}
}

response = client().prepareSearch(INDEX_NAME).setQuery(QueryBuilders.termQuery(FIELD_NAME, "missing_feature")).get();
assertThat(response.getHits().getTotalHits().value, equalTo(0L));
}

private void init() throws IOException {
Settings.Builder settings = Settings.builder();
settings.put(indexSettings());
prepareCreate(INDEX_NAME).setSettings(settings)
.setMapping(
jsonBuilder().startObject()
.startObject("_doc")
.startObject("properties")
.startObject("all_rank_features")
.field("type", "rank_features")
.endObject()
.endObject()
.endObject()
.endObject()
)
.get();
ensureGreen();

BulkResponse bulk = client().prepareBulk()
.add(
client().prepareIndex(INDEX_NAME)
.setId("all")
.setSource(Map.of("all_rank_features", Map.of(LOWER_RANKED_FEATURE, 10, HIGHER_RANKED_FEATURE, 20)))
)
.add(client().prepareIndex(INDEX_NAME).setId("lower").setSource(Map.of("all_rank_features", Map.of(LOWER_RANKED_FEATURE, 10))))
.add(
client().prepareIndex(INDEX_NAME).setId("higher").setSource(Map.of("all_rank_features", Map.of(HIGHER_RANKED_FEATURE, 20)))
)
.get();
assertFalse(bulk.buildFailureMessage(), bulk.hasFailures());
assertThat(refresh().getFailedShards(), equalTo(0));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.apache.lucene.document.FeatureField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand All @@ -27,6 +28,8 @@
import java.io.IOException;
import java.util.Map;

import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;

/**
* A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
* vector of features.
Expand Down Expand Up @@ -77,7 +80,7 @@ public static final class RankFeaturesFieldType extends MappedFieldType {
private final boolean positiveScoreImpact;

public RankFeaturesFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
super(name, false, false, false, TextSearchInfo.NONE, meta);
super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
this.positiveScoreImpact = positiveScoreImpact;
}

Expand All @@ -86,10 +89,6 @@ public String typeName() {
return CONTENT_TYPE;
}

public boolean positiveScoreImpact() {
return positiveScoreImpact;
}

@Override
public Query existsQuery(SearchExecutionContext context) {
throw new IllegalArgumentException("[rank_features] fields do not support [exists] queries");
Expand All @@ -107,7 +106,14 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)

@Override
public Query termQuery(Object value, SearchExecutionContext context) {
throw new IllegalArgumentException("Queries on [rank_features] fields are not supported");
return FeatureField.newLinearQuery(name(), indexedValueForSearch(value), DEFAULT_BOOST);
}

private static String indexedValueForSearch(Object value) {
if (value instanceof BytesRef) {
return ((BytesRef) value).utf8ToString();
}
return value.toString();
}
}

Expand Down

0 comments on commit 323a13a

Please sign in to comment.