ESQL: Fix a bug loading unindexed text fields (#104553)

This fixes a bug in ESQL where we wouldn't load non-indexed `text` fields. Those aren't all that common, but they absolutely can appear in enrich indices. Like, why wouldn't you index a `text` fields? It's *for* indexing. But in an enrich index elasticsearch control the mapping. It just turns off indexing. So you can accidentally get non-indexed `text` fields. Anyway! This fixes the bug. The trouble was that we were checking for norms on the text field if the analyzer made norms. But, oh no! If the field isn't indexed there won't be norms! Disaster. And, of course, we weren't checking that. I thought the way we checked if the norms existed was going to pick up if indexing was turned off. It doesn't. This fixes it.
elastic · Jan 18, 2024 · 796f172 · 796f172
1 parent 4121cf6
commit 796f172
Show file tree

Hide file tree

Showing 8 changed files with 274 additions and 21 deletions.
diff --git a/docs/changelog/104553.yaml b/docs/changelog/104553.yaml
@@ -0,0 +1,5 @@
+pr: 104553
+summary: "ESQL: Fix a bug loading unindexed text fields"
+area: ES|QL
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -994,10 +994,11 @@ protected String delegatingTo() {
          * using whatever
          */
         private BlockSourceReader.LeafIteratorLookup blockReaderDisiLookup(BlockLoaderContext blContext) {
-            if (getTextSearchInfo().hasNorms()) {
-                return BlockSourceReader.lookupFromNorms(name());
-            }
-            if (isIndexed() == false && isStored() == false) {
+            if (isIndexed()) {
+                if (getTextSearchInfo().hasNorms()) {
+                    return BlockSourceReader.lookupFromNorms(name());
+                }
+            } else if (isStored() == false) {
                 return BlockSourceReader.lookupMatchingAll();
             }
             return BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
@@ -1120,6 +1120,7 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed)
         assumeFalse("ignore_malformed not supported", ignoreMalformed);
         boolean storeTextField = randomBoolean();
         boolean storedKeywordField = storeTextField || randomBoolean();
+        boolean indexText = randomBoolean();
         String nullValue = storeTextField || usually() ? null : randomAlphaOfLength(2);
         Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
         KeywordFieldMapperTests.KeywordSyntheticSourceSupport keywordSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
@@ -1137,7 +1138,12 @@ public SyntheticSourceExample example(int maxValues) {
                         delegate.inputValue(),
                         delegate.expectedForSyntheticSource(),
                         delegate.expectedForBlockLoader(),
-                        b -> b.field("type", "text").field("store", true)
+                        b -> {
+                            b.field("type", "text").field("store", true);
+                            if (indexText == false) {
+                                b.field("index", false);
+                            }
+                        }
                     );
                 }
                 // We'll load from _source if ignore_above is defined, otherwise we load from the keyword field.
@@ -1149,6 +1155,9 @@ public SyntheticSourceExample example(int maxValues) {
                     delegate.expectedForBlockLoader(),
                     b -> {
                         b.field("type", "text");
+                        if (indexText == false) {
+                            b.field("index", false);
+                        }
                         b.startObject("fields");
                         {
                             b.startObject(randomAlphaOfLength(4));

diff --git a/...ck/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java b/...ck/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java
@@ -300,9 +300,12 @@ private void doLookup(
                 new ValuesSourceReaderOperator(
                     driverContext.blockFactory(),
                     fields,
-                    List.of(new ValuesSourceReaderOperator.ShardContext(searchContext.searcher().getIndexReader(), () -> {
-                        throw new UnsupportedOperationException("can't load _source as part of enrich");
-                    })),
+                    List.of(
+                        new ValuesSourceReaderOperator.ShardContext(
+                            searchContext.searcher().getIndexReader(),
+                            searchContext::newSourceLoader
+                        )
+                    ),
                     0
                 )
             );

diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_enrich.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_enrich.yml
@@ -66,6 +66,12 @@ setup:
           - { "index": { } }
           - { "name": "Denise", "city_id": "sgn" }
 
+---
+teardown:
+  - do:
+      enrich.delete_policy:
+        name: cities_policy
+
 ---
 "Basic":
   - do:
@@ -129,8 +135,3 @@ setup:
   - match: { values.1: [ "Bob", "nyc", "USA" ] }
   - match: { values.2: [ "Denise", "sgn", null ] }
   - match: { values.3: [ "Mario", "rom", "Italy" ] }
-
-  - do:
-      enrich.delete_policy:
-        name: cities_policy
-  - is_true: acknowledged
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/61_enrich_ip.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/61_enrich_ip.yml
@@ -73,6 +73,13 @@ setup:
           - { "@timestamp": "2023-06-22", "ip": "10.101.0.107", "message": "network disconnected" }
           - { "index": { } }
           - { "@timestamp": "2023-06-24", "ip": "13.101.0.114", "message": "authentication failed" }
+
+---
+teardown:
+  - do:
+      enrich.delete_policy:
+        name: networks-policy
+
 ---
 "IP strings":
 
@@ -97,8 +104,3 @@ setup:
   - match: { values.1: [ [ "10.100.0.21", "10.101.0.107" ], [ "Production", "QA" ], [ "OPS","Engineering" ], "sending messages" ] }
   - match: { values.2: [ "10.101.0.107" , "QA", "Engineering", "network disconnected" ] }
   - match: { values.3: [ "13.101.0.114" , null, null, "authentication failed" ] }
-
-  - do:
-      enrich.delete_policy:
-        name: networks-policy
-  - is_true: acknowledged
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/62_extra_enrich.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/62_extra_enrich.yml
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/90_non_indexed.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/90_non_indexed.yml
@@ -59,7 +59,11 @@ setup:
                 type: long
                 index: false
                 doc_values: false
-
+              text:
+                type: text
+              text_noidx:
+                type: text
+                index: false
 
   - do:
       bulk:
@@ -83,7 +87,9 @@ setup:
             "date": "2021-04-28T18:50:04.467Z",
             "date_noidx": "2021-04-28T18:50:04.467Z",
             "ip": "192.168.0.1",
-            "ip_noidx": "192.168.0.1"
+            "ip_noidx": "192.168.0.1",
+            "text": "bar",
+            "text_noidx": "bar"
           }
 
 ---
@@ -96,6 +102,7 @@ fetch:
         body:
           query: 'from test'
 
+  - length: { columns: 18 }
   - match: { columns.0.name: boolean }
   - match: { columns.0.type: boolean }
   - match: { columns.1.name: boolean_noidx }
@@ -128,9 +135,12 @@ fetch:
   - match: { columns.14.type: long }
   - match: { columns.15.name: long_noidx }
   - match: { columns.15.type: long }
+  - match: { columns.16.name: text }
+  - match: { columns.16.type: text }
+  - match: { columns.17.name: text_noidx }
+  - match: { columns.17.type: text }
 
   - length: { values: 1 }
-
   - match: { values.0.0: true }
   - match: { values.0.1: true }
   - match: { values.0.2: "2021-04-28T18:50:04.467Z" }
@@ -147,3 +157,5 @@ fetch:
   - match: { values.0.13: "foo" }
   - match: { values.0.14: 20 }
   - match: { values.0.15: 20 }
+  - match: { values.0.16: "bar" }
+  - match: { values.0.17: "bar" }