Add term query support to rank_features mapped field (#93247)

This adds term query capabilities for rank_features fields. term queries against rank_features are not scored in the typical way as regular fields. This is because the stored feature values take advantage of the term frequency storage mechanism, and thus regular BM25 does not work. Instead, a term query against a rank_features field is very similar to linear rank_feature query. If more complicated combinations of features and values are required, the rank_feature query should be used.
elastic · Feb 1, 2023 · 323a13a · 323a13a
1 parent 7f9f3bc
commit 323a13a
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 7 deletions.
diff --git a/docs/changelog/93247.yaml b/docs/changelog/93247.yaml
@@ -0,0 +1,5 @@
+pr: 93247
+summary: Add `term` query support to `rank_features` mapped field
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/reference/mapping/types/rank-features.asciidoc b/docs/reference/mapping/types/rank-features.asciidoc
@@ -83,7 +83,10 @@ NOTE: `rank_features` fields only support single-valued features and strictly
 positive values. Multi-valued fields and zero or negative values will be rejected.
 
 NOTE: `rank_features` fields do not support sorting or aggregating and may
-only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> queries.
+only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> or <<query-dsl-term-query, `term`>> queries.
+
+NOTE: <<query-dsl-term-query, `term`>> queries on `rank_features` fields are scored by multiplying the matched
+stored feature value by the provided `boost`.
 
 NOTE: `rank_features` fields only preserve 9 significant bits for the
 precision, which translates to a relative error of about 0.4%.

diff --git a/...ernalClusterTest/java/org/elasticsearch/index/mapper/RankFeaturesMapperIntegrationIT.java b/...ernalClusterTest/java/org/elasticsearch/index/mapper/RankFeaturesMapperIntegrationIT.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.test.ESIntegTestCase;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+
+import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
+import static org.hamcrest.Matchers.equalTo;
+
+public class RankFeaturesMapperIntegrationIT extends ESIntegTestCase {
+
+    private static final String LOWER_RANKED_FEATURE = "ten";
+    private static final String HIGHER_RANKED_FEATURE = "twenty";
+    private static final String INDEX_NAME = "rank_feature_test";
+    private static final String FIELD_NAME = "all_rank_features";
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return Arrays.asList(MapperExtrasPlugin.class);
+    }
+
+    public void testRankFeaturesTermQuery() throws IOException {
+        init();
+        SearchResponse response = client().prepareSearch(INDEX_NAME)
+            .setQuery(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE))
+            .get();
+        assertThat(response.getHits().getTotalHits().value, equalTo(2L));
+        for (SearchHit hit : response.getHits().getHits()) {
+            assertThat(hit.getScore(), equalTo(20f));
+        }
+
+        response = client().prepareSearch(INDEX_NAME)
+            .setQuery(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE).boost(100f))
+            .get();
+        assertThat(response.getHits().getTotalHits().value, equalTo(2L));
+        for (SearchHit hit : response.getHits().getHits()) {
+            assertThat(hit.getScore(), equalTo(2000f));
+        }
+
+        response = client().prepareSearch(INDEX_NAME)
+            .setQuery(
+                QueryBuilders.boolQuery()
+                    .should(QueryBuilders.termQuery(FIELD_NAME, HIGHER_RANKED_FEATURE))
+                    .should(QueryBuilders.termQuery(FIELD_NAME, LOWER_RANKED_FEATURE).boost(3f))
+                    .minimumShouldMatch(1)
+            )
+            .get();
+        assertThat(response.getHits().getTotalHits().value, equalTo(3L));
+        for (SearchHit hit : response.getHits().getHits()) {
+            if (hit.getId().equals("all")) {
+                assertThat(hit.getScore(), equalTo(50f));
+            }
+            if (hit.getId().equals("lower")) {
+                assertThat(hit.getScore(), equalTo(30f));
+            }
+            if (hit.getId().equals("higher")) {
+                assertThat(hit.getScore(), equalTo(20f));
+            }
+        }
+
+        response = client().prepareSearch(INDEX_NAME).setQuery(QueryBuilders.termQuery(FIELD_NAME, "missing_feature")).get();
+        assertThat(response.getHits().getTotalHits().value, equalTo(0L));
+    }
+
+    private void init() throws IOException {
+        Settings.Builder settings = Settings.builder();
+        settings.put(indexSettings());
+        prepareCreate(INDEX_NAME).setSettings(settings)
+            .setMapping(
+                jsonBuilder().startObject()
+                    .startObject("_doc")
+                    .startObject("properties")
+                    .startObject("all_rank_features")
+                    .field("type", "rank_features")
+                    .endObject()
+                    .endObject()
+                    .endObject()
+                    .endObject()
+            )
+            .get();
+        ensureGreen();
+
+        BulkResponse bulk = client().prepareBulk()
+            .add(
+                client().prepareIndex(INDEX_NAME)
+                    .setId("all")
+                    .setSource(Map.of("all_rank_features", Map.of(LOWER_RANKED_FEATURE, 10, HIGHER_RANKED_FEATURE, 20)))
+            )
+            .add(client().prepareIndex(INDEX_NAME).setId("lower").setSource(Map.of("all_rank_features", Map.of(LOWER_RANKED_FEATURE, 10))))
+            .add(
+                client().prepareIndex(INDEX_NAME).setId("higher").setSource(Map.of("all_rank_features", Map.of(HIGHER_RANKED_FEATURE, 20)))
+            )
+            .get();
+        assertFalse(bulk.buildFailureMessage(), bulk.hasFailures());
+        assertThat(refresh().getFailedShards(), equalTo(0));
+    }
+
+}
diff --git a/...r-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeaturesFieldMapper.java b/...r-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeaturesFieldMapper.java
@@ -10,6 +10,7 @@
 
 import org.apache.lucene.document.FeatureField;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -27,6 +28,8 @@
 import java.io.IOException;
 import java.util.Map;
 
+import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
+
 /**
  * A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
  * vector of features.
@@ -77,7 +80,7 @@ public static final class RankFeaturesFieldType extends MappedFieldType {
         private final boolean positiveScoreImpact;
 
         public RankFeaturesFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
-            super(name, false, false, false, TextSearchInfo.NONE, meta);
+            super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
             this.positiveScoreImpact = positiveScoreImpact;
         }
 
@@ -86,10 +89,6 @@ public String typeName() {
             return CONTENT_TYPE;
         }
 
-        public boolean positiveScoreImpact() {
-            return positiveScoreImpact;
-        }
-
         @Override
         public Query existsQuery(SearchExecutionContext context) {
             throw new IllegalArgumentException("[rank_features] fields do not support [exists] queries");
@@ -107,7 +106,14 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
 
         @Override
         public Query termQuery(Object value, SearchExecutionContext context) {
-            throw new IllegalArgumentException("Queries on [rank_features] fields are not supported");
+            return FeatureField.newLinearQuery(name(), indexedValueForSearch(value), DEFAULT_BOOST);
+        }
+
+        private static String indexedValueForSearch(Object value) {
+            if (value instanceof BytesRef) {
+                return ((BytesRef) value).utf8ToString();
+            }
+            return value.toString();
         }
     }