Skip to content

Commit

Permalink
ESQL: Fix a bug loading unindexed text fields (#104553)
Browse files Browse the repository at this point in the history
This fixes a bug in ESQL where we wouldn't load non-indexed `text`
fields. Those aren't all that common, but they absolutely can appear in
enrich indices. Like, why wouldn't you index a `text` fields? It's *for*
indexing. But in an enrich index elasticsearch control the mapping. It
just turns off indexing. So you can accidentally get non-indexed `text`
fields.

Anyway! This fixes the bug. The trouble was that we were checking for
norms on the text field if the analyzer made norms. But, oh no! If the
field isn't indexed there won't be norms! Disaster. And, of course, we
weren't checking that. I thought the way we checked if the norms existed
was going to pick up if indexing was turned off. It doesn't. This fixes
it.
  • Loading branch information
nik9000 committed Jan 18, 2024
1 parent 4121cf6 commit 796f172
Show file tree
Hide file tree
Showing 8 changed files with 274 additions and 21 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/104553.yaml
@@ -0,0 +1,5 @@
pr: 104553
summary: "ESQL: Fix a bug loading unindexed text fields"
area: ES|QL
type: bug
issues: []
Expand Up @@ -994,10 +994,11 @@ protected String delegatingTo() {
* using whatever
*/
private BlockSourceReader.LeafIteratorLookup blockReaderDisiLookup(BlockLoaderContext blContext) {
if (getTextSearchInfo().hasNorms()) {
return BlockSourceReader.lookupFromNorms(name());
}
if (isIndexed() == false && isStored() == false) {
if (isIndexed()) {
if (getTextSearchInfo().hasNorms()) {
return BlockSourceReader.lookupFromNorms(name());
}
} else if (isStored() == false) {
return BlockSourceReader.lookupMatchingAll();
}
return BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());
Expand Down
Expand Up @@ -1120,6 +1120,7 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed)
assumeFalse("ignore_malformed not supported", ignoreMalformed);
boolean storeTextField = randomBoolean();
boolean storedKeywordField = storeTextField || randomBoolean();
boolean indexText = randomBoolean();
String nullValue = storeTextField || usually() ? null : randomAlphaOfLength(2);
Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
KeywordFieldMapperTests.KeywordSyntheticSourceSupport keywordSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
Expand All @@ -1137,7 +1138,12 @@ public SyntheticSourceExample example(int maxValues) {
delegate.inputValue(),
delegate.expectedForSyntheticSource(),
delegate.expectedForBlockLoader(),
b -> b.field("type", "text").field("store", true)
b -> {
b.field("type", "text").field("store", true);
if (indexText == false) {
b.field("index", false);
}
}
);
}
// We'll load from _source if ignore_above is defined, otherwise we load from the keyword field.
Expand All @@ -1149,6 +1155,9 @@ public SyntheticSourceExample example(int maxValues) {
delegate.expectedForBlockLoader(),
b -> {
b.field("type", "text");
if (indexText == false) {
b.field("index", false);
}
b.startObject("fields");
{
b.startObject(randomAlphaOfLength(4));
Expand Down
Expand Up @@ -300,9 +300,12 @@ private void doLookup(
new ValuesSourceReaderOperator(
driverContext.blockFactory(),
fields,
List.of(new ValuesSourceReaderOperator.ShardContext(searchContext.searcher().getIndexReader(), () -> {
throw new UnsupportedOperationException("can't load _source as part of enrich");
})),
List.of(
new ValuesSourceReaderOperator.ShardContext(
searchContext.searcher().getIndexReader(),
searchContext::newSourceLoader
)
),
0
)
);
Expand Down
Expand Up @@ -66,6 +66,12 @@ setup:
- { "index": { } }
- { "name": "Denise", "city_id": "sgn" }

---
teardown:
- do:
enrich.delete_policy:
name: cities_policy

---
"Basic":
- do:
Expand Down Expand Up @@ -129,8 +135,3 @@ setup:
- match: { values.1: [ "Bob", "nyc", "USA" ] }
- match: { values.2: [ "Denise", "sgn", null ] }
- match: { values.3: [ "Mario", "rom", "Italy" ] }

- do:
enrich.delete_policy:
name: cities_policy
- is_true: acknowledged
Expand Up @@ -73,6 +73,13 @@ setup:
- { "@timestamp": "2023-06-22", "ip": "10.101.0.107", "message": "network disconnected" }
- { "index": { } }
- { "@timestamp": "2023-06-24", "ip": "13.101.0.114", "message": "authentication failed" }

---
teardown:
- do:
enrich.delete_policy:
name: networks-policy

---
"IP strings":

Expand All @@ -97,8 +104,3 @@ setup:
- match: { values.1: [ [ "10.100.0.21", "10.101.0.107" ], [ "Production", "QA" ], [ "OPS","Engineering" ], "sending messages" ] }
- match: { values.2: [ "10.101.0.107" , "QA", "Engineering", "network disconnected" ] }
- match: { values.3: [ "13.101.0.114" , null, null, "authentication failed" ] }

- do:
enrich.delete_policy:
name: networks-policy
- is_true: acknowledged

Large diffs are not rendered by default.

Expand Up @@ -59,7 +59,11 @@ setup:
type: long
index: false
doc_values: false

text:
type: text
text_noidx:
type: text
index: false

- do:
bulk:
Expand All @@ -83,7 +87,9 @@ setup:
"date": "2021-04-28T18:50:04.467Z",
"date_noidx": "2021-04-28T18:50:04.467Z",
"ip": "192.168.0.1",
"ip_noidx": "192.168.0.1"
"ip_noidx": "192.168.0.1",
"text": "bar",
"text_noidx": "bar"
}

---
Expand All @@ -96,6 +102,7 @@ fetch:
body:
query: 'from test'

- length: { columns: 18 }
- match: { columns.0.name: boolean }
- match: { columns.0.type: boolean }
- match: { columns.1.name: boolean_noidx }
Expand Down Expand Up @@ -128,9 +135,12 @@ fetch:
- match: { columns.14.type: long }
- match: { columns.15.name: long_noidx }
- match: { columns.15.type: long }
- match: { columns.16.name: text }
- match: { columns.16.type: text }
- match: { columns.17.name: text_noidx }
- match: { columns.17.type: text }

- length: { values: 1 }

- match: { values.0.0: true }
- match: { values.0.1: true }
- match: { values.0.2: "2021-04-28T18:50:04.467Z" }
Expand All @@ -147,3 +157,5 @@ fetch:
- match: { values.0.13: "foo" }
- match: { values.0.14: 20 }
- match: { values.0.15: 20 }
- match: { values.0.16: "bar" }
- match: { values.0.17: "bar" }

0 comments on commit 796f172

Please sign in to comment.