From 0512dc266c6227d820a18c3044655aa248795467 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 31 Jan 2025 08:49:56 -0500 Subject: [PATCH] Fix bug where intercepted semantic knn queries did not respect filters --- .../xpack/inference/InferenceFeatures.java | 4 +- ...anticKnnVectorQueryRewriteInterceptor.java | 10 ++++- .../test/inference/47_semantic_text_knn.yml | 40 +++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 8c2be17777cca..d63e3f773b14b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -15,6 +15,7 @@ import java.util.Set; +import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor.SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; @@ -42,7 +43,8 @@ public Set getTestFeatures() { SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, - SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT + SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT, + SEMANTIC_KNN_FILTER_FIX ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 57805d5277ffc..9e513a1ed9226 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -28,6 +28,7 @@ public class SemanticKnnVectorQueryRewriteInterceptor extends SemanticQueryRewri public static final NodeFeature SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED = new NodeFeature( "search.semantic_knn_vector_query_rewrite_interception_supported" ); + public static final NodeFeature SEMANTIC_KNN_FILTER_FIX = new NodeFeature("search.semantic_knn_filter_fix"); public SemanticKnnVectorQueryRewriteInterceptor() {} @@ -147,6 +148,7 @@ private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection ); } + copy.addFilterQueries(original.filterQueries()); copy.addFilterQuery(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return copy; } @@ -165,8 +167,9 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( KnnVectorQueryBuilder original, QueryVectorBuilder queryVectorBuilder ) { + KnnVectorQueryBuilder newQueryBuilder; if (original.queryVectorBuilder() != null) { - return new KnnVectorQueryBuilder( + newQueryBuilder = new KnnVectorQueryBuilder( fieldName, queryVectorBuilder, original.k(), @@ -174,7 +177,7 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( original.getVectorSimilarity() ); } else { - return new KnnVectorQueryBuilder( + newQueryBuilder = new KnnVectorQueryBuilder( fieldName, original.queryVector(), original.k(), @@ -183,6 +186,9 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( original.getVectorSimilarity() ); } + + newQueryBuilder.addFilterQueries(original.filterQueries()); + return newQueryBuilder; } @Override diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index dec4e127e501c..64ecb0f2d882c 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -43,6 +43,8 @@ setup: body: mappings: properties: + keyword_field: + type: keyword inference_field: type: semantic_text inference_id: dense-inference-id @@ -53,6 +55,8 @@ setup: body: mappings: properties: + keyword_field: + type: keyword inference_field: type: semantic_text inference_id: dense-inference-id-2 @@ -63,6 +67,8 @@ setup: body: mappings: properties: + keyword_field: + type: keyword inference_field: type: dense_vector dims: 10 @@ -74,6 +80,8 @@ setup: body: mappings: properties: + keyword_field: + type: keyword inference_field: type: dense_vector dims: 3 @@ -84,6 +92,7 @@ setup: index: test-semantic-text-index id: doc_1 body: + keyword_field: "foo" inference_field: [ "inference test", "another inference test" ] refresh: true @@ -92,6 +101,7 @@ setup: index: test-semantic-text-index-2 id: doc_2 body: + keyword_field: "bar" inference_field: [ "inference test", "another inference test" ] refresh: true @@ -100,6 +110,7 @@ setup: index: test-dense-vector-index id: doc_3 body: + keyword_field: "baz" inference_field: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] refresh: true @@ -108,6 +119,7 @@ setup: index: test-incompatible-dense-vector-index id: doc_4 body: + keyword_field: "qux" inference_field: [ 1, 2, 3 ] refresh: true @@ -311,6 +323,34 @@ setup: - match: { hits.total.value: 2 } +--- +"knn query respects filters": + - requires: + cluster_features: "search.semantic_knn_filter_fix" + reason: filters fixed in 8.18.0 + + - do: + search: + index: + - test-semantic-text-index + - test-semantic-text-index-2 + body: + query: + knn: + field: inference_field + k: 10 + num_candidates: 100 + query_vector_builder: + text_embedding: + model_text: test + filter: + term: + keyword_field: "foo" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + + --- "knn query against multiple semantic_text fields with multiple inference IDs specified in semantic_text fields with smaller k returns k for each index":