From 0a293668e03f935a6f02297b0e39863157a9c973 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 21 Oct 2024 12:22:06 -0400 Subject: [PATCH] ESQL: Fix `REVERSE` with backspace character (#115245) * ESQL: Fix `REVERSE` with backspace character If the text contains a backspace character aka `0x28` aka ctrl-H then we should use the slow reverse path. This is going to be quite rare but our test data is sure good at making rare, fun stuff. Closes #115228 Closes #115227 Closes #114372 --- docs/changelog/115245.yaml | 8 ++++++++ .../expression/function/scalar/string/Reverse.java | 13 ++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/115245.yaml diff --git a/docs/changelog/115245.yaml b/docs/changelog/115245.yaml new file mode 100644 index 0000000000000..294328567c3aa --- /dev/null +++ b/docs/changelog/115245.yaml @@ -0,0 +1,8 @@ +pr: 115245 +summary: "ESQL: Fix `REVERSE` with backspace character" +area: ES|QL +type: bug +issues: + - 114372 + - 115227 + - 115228 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java index bf4e47d8d0de4..e161566838cd9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java @@ -10,7 +10,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -79,8 +78,6 @@ protected TypeResolution resolveType() { /** * Reverses a unicode string, keeping grapheme clusters together - * @param str - * @return */ public static String reverseStringWithUnicodeCharacters(String str) { BreakIterator boundary = BreakIterator.getCharacterInstance(Locale.ROOT); @@ -100,10 +97,12 @@ public static String reverseStringWithUnicodeCharacters(String str) { return reversed.toString(); } - private static boolean isOneByteUTF8(BytesRef ref) { + private static boolean reverseBytesIsReverseUnicode(BytesRef ref) { int end = ref.offset + ref.length; for (int i = ref.offset; i < end; i++) { - if (ref.bytes[i] < 0) { + if (ref.bytes[i] < 0 // Anything encoded in multibyte utf-8 + || ref.bytes[i] == 0x28 // Backspace + ) { return false; } } @@ -112,13 +111,13 @@ private static boolean isOneByteUTF8(BytesRef ref) { @Evaluator static BytesRef process(BytesRef val) { - if (isOneByteUTF8(val)) { + if (reverseBytesIsReverseUnicode(val)) { // this is the fast path. we know we can just reverse the bytes. BytesRef reversed = BytesRef.deepCopyOf(val); reverseArray(reversed.bytes, reversed.offset, reversed.length); return reversed; } - return BytesRefs.toBytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString())); + return new BytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString())); } @Override