From 613f58c47912e28aa345182c329c4b48e6d569c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Sun, 26 Oct 2025 22:54:16 +0100 Subject: [PATCH 01/30] Function test --- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 4d9ddb83ae301..4fd02cdc9c3c8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2748,3 +2748,9 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword 1463 | J.%20R.%20R.%20Tolkien | Realms%20of%20Tolkien%3A%20Images%20of%20Middle-earth ; +ascii +row a = "eñe\nis a Spanish letter" | eval x = ascii(a); + +a:keyword | x:keyword +eñe\nis a Spanish letter | e\\xf1e\\nis a Spanish letter +; From 492120aec1116252f0d50b587c9c23dab173d8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Mon, 27 Oct 2025 19:12:21 +0100 Subject: [PATCH 02/30] Add `ascii` to the function registry --- .../xpack/esql/expression/function/EsqlFunctionRegistry.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 2f4d72338b4fc..e8315aba26cfe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -175,6 +175,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StY; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Ascii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; @@ -397,6 +398,7 @@ private static FunctionDefinition[][] functions() { def(Tau.class, Tau::new, "tau") }, // string new FunctionDefinition[] { + def(Ascii.class, Ascii::new, "ascii"), def(BitLength.class, BitLength::new, "bit_length"), def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), From 5365172fdbf42368ded30c18c89b3039e06317f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Mon, 27 Oct 2025 19:12:38 +0100 Subject: [PATCH 03/30] `ascii` function implementation --- .../scalar/string/AsciiEvaluator.java | 146 ++++++++++++++ .../function/scalar/string/Ascii.java | 181 ++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java new file mode 100644 index 0000000000000..7d99c6f42d170 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java @@ -0,0 +1,146 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Ascii}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class AsciiEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(AsciiEvaluator.class); + + private final Source source; + + private final BreakingBytesRefBuilder scratch; + + private final EvalOperator.ExpressionEvaluator val; + + private final DriverContext driverContext; + + private Warnings warnings; + + public AsciiEvaluator(Source source, BreakingBytesRefBuilder scratch, + EvalOperator.ExpressionEvaluator val, DriverContext driverContext) { + this.source = source; + this.scratch = scratch; + this.val = val; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock valBlock = (BytesRefBlock) val.eval(page)) { + BytesRefVector valVector = valBlock.asVector(); + if (valVector == null) { + return eval(page.getPositionCount(), valBlock); + } + return eval(page.getPositionCount(), valVector).asBlock(); + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += val.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef valScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + switch (valBlock.getValueCount(p)) { + case 0: + result.appendNull(); + continue position; + case 1: + break; + default: + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + result.appendNull(); + continue position; + } + BytesRef val = valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch); + result.appendBytesRef(Ascii.process(this.scratch, val)); + } + return result.build(); + } + } + + public BytesRefVector eval(int positionCount, BytesRefVector valVector) { + try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + BytesRef valScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + BytesRef val = valVector.getBytesRef(p, valScratch); + result.appendBytesRef(Ascii.process(this.scratch, val)); + } + return result.build(); + } + } + + @Override + public String toString() { + return "AsciiEvaluator[" + "val=" + val + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(scratch, val); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final Function scratch; + + private final EvalOperator.ExpressionEvaluator.Factory val; + + public Factory(Source source, Function scratch, + EvalOperator.ExpressionEvaluator.Factory val) { + this.source = source; + this.scratch = scratch; + this.val = val; + } + + @Override + public AsciiEvaluator get(DriverContext context) { + return new AsciiEvaluator(source, scratch.apply(context), val.get(context), context); + } + + @Override + public String toString() { + return "AsciiEvaluator[" + "val=" + val + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java new file mode 100644 index 0000000000000..aecab2c5fd9bb --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -0,0 +1,181 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Fixed; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * Escape non ASCII characters + */ +public final class Ascii extends UnaryScalarFunction { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Ascii", Ascii::new); + + @FunctionInfo( + returnType = { "keyword" }, + description = "Escape non ASCII characters.", + examples = @Example(file = "string", tag = "ascii") + ) + public Ascii( + Source source, + @Param( + name = "string", + type = { "keyword", "text" }, + description = "String expression. If `null`, the function returns `null`." + ) Expression str + ) { + super(source, str); + } + + private Ascii(StreamInput in) throws IOException { + super(in); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + return isString(field, sourceText(), TypeResolutions.ParamOrdinal.DEFAULT); + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + var field = toEvaluator.apply(field()); + return new AsciiEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), + field + ); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Ascii(source(), newChildren.get(0)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Ascii::new, field()); + } + + @Evaluator + static BytesRef process( + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, + BytesRef val + ) { + UnicodeUtil.UTF8CodePoint codePoint = new UnicodeUtil.UTF8CodePoint(); + + int finalSize = 0; + + int offset = val.offset; + while (offset < val.offset + val.length) { + codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); + + BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); + var escaped = escapeCodePoint(input, codePoint); + + finalSize += escaped.length; + + offset += codePoint.numBytes; + } + + scratch.grow(finalSize); + scratch.clear(); + + offset = val.offset; + while (offset < val.offset + val.length) { + codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); + + BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); + var escaped = escapeCodePoint(input, codePoint); + + scratch.append(escaped); + + offset += codePoint.numBytes; + } + + return scratch.bytesRefView(); + } + + /** + * Escapes a Unicode code point similar to Python's ascii() function. + * Returns the input BytesRef for printable ASCII characters that don't need escaping. + */ + private static BytesRef escapeCodePoint(BytesRef input, UnicodeUtil.UTF8CodePoint codePoint) { + var code = codePoint.codePoint; + + // Printable ASCII characters (32-126) don't need escaping + if (code >= 32 && code <= 126) { + return input; + } + + String resultStr = switch (code) { + case '\n' -> "\\\\n"; + case '\r' -> "\\\\r"; + case '\t' -> "\\\\t"; + case '\b' -> "\\\\b"; + case '\f' -> "\\\\f"; + case '\\' -> "\\\\\\"; + case '\'' -> "\\\\'"; + case '\"' -> "\\\\\""; + default -> null; + + // Handle special ASCII control characters + }; + + if (resultStr != null) { + return new BytesRef(resultStr); + } + + // ASCII control characters (0-31, 127) + if (code < 128) { + resultStr = String.format("\\\\x%02x", code); + } else if (code <= 0xFF) { + // Use xHH for code points 128-255 + resultStr = String.format("\\\\x%02x", code); + } else if (code <= 0xFFFF) { + // Use uHHHH for code points 256-65535 + resultStr = String.format("\\\\u%04x", code); + } else { + // Use UHHHHHHHH for code points above 65535 + resultStr = String.format("\\\\U%08x", code); + } + + return new BytesRef(resultStr); + } +} From 4e04a5384f5e39be468d7c3b3c53eafc3df9345d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Mon, 27 Oct 2025 19:15:02 +0100 Subject: [PATCH 04/30] Simplified test case --- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 4fd02cdc9c3c8..27c1e83cb675e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2749,8 +2749,8 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword ; ascii -row a = "eñe\nis a Spanish letter" | eval x = ascii(a); +row a = "eñe\nis a Spanish letter" | eval x = ascii(a) | keep x; -a:keyword | x:keyword -eñe\nis a Spanish letter | e\\xf1e\\nis a Spanish letter +x:keyword +e\\xf1e\\nis a Spanish letter ; From 2904ddd3977b28a4271d32d3c3167d97daaf855b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 17:34:58 +0100 Subject: [PATCH 05/30] Clean-up code imports and add comments. --- .../esql/expression/function/scalar/string/Ascii.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index aecab2c5fd9bb..d2449a7445d0d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -25,10 +25,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; import java.io.IOException; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.Optional; import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -102,6 +99,9 @@ static BytesRef process( int finalSize = 0; + /* A first iteration determines the total grow size. This is used to grow the scratch array + just once which guarantees O(n) as worst case time complexity for the appending operation. + */ int offset = val.offset; while (offset < val.offset + val.length) { codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); @@ -117,6 +117,7 @@ static BytesRef process( scratch.grow(finalSize); scratch.clear(); + //The second pass fills in the escaped values offset = val.offset; while (offset < val.offset + val.length) { codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); From df9825dcfd63930c35bc6425e996dc4199286545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 21:07:32 +0100 Subject: [PATCH 06/30] Expand the string.csv-spec case --- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 27c1e83cb675e..f42ef384b7afa 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2749,8 +2749,8 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword ; ascii -row a = "eñe\nis a Spanish letter" | eval x = ascii(a) | keep x; +row a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | eval x = ascii(a) | keep x; x:keyword -e\\xf1e\\nis a Spanish letter +Hello\\n\\t \\u4e16\\u754c! \\U0001f30d Caf\\xe9 na\\xefve r\\xe9sum\\xe9 \\u3053\\u3093\\u306b\\u3061\\u306f \\U0001f389 \\u4e2d\\u6587\\u6d4b\\u8bd5 \\u03b1\\u03b2\\u03b3\\u03b4\\u03b5 \\u65e5\\u672c\\u8a9e\\u30c6\\u30b9\\u30c8 \\U0001f680\\U0001f525\\U0001f4a7\\U0001faa8 ; From 085034e7c7ab49d47cec5fa9171f7c1090498657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 21:07:46 +0100 Subject: [PATCH 07/30] Simplify and clean-up code --- .../function/scalar/string/Ascii.java | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index d2449a7445d0d..e05f8043f7ec3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.List; +import java.util.Optional; import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -41,6 +42,7 @@ public final class Ascii extends UnaryScalarFunction { description = "Escape non ASCII characters.", examples = @Example(file = "string", tag = "ascii") ) + public Ascii( Source source, @Param( @@ -107,9 +109,9 @@ just once which guarantees O(n) as worst case time complexity for the appending codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); - var escaped = escapeCodePoint(input, codePoint); + var maybeEscaped = escapeCodePoint(codePoint); - finalSize += escaped.length; + finalSize += maybeEscaped.orElse(input).length; offset += codePoint.numBytes; } @@ -123,9 +125,9 @@ just once which guarantees O(n) as worst case time complexity for the appending codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); - var escaped = escapeCodePoint(input, codePoint); + var maybeEscaped = escapeCodePoint(codePoint); - scratch.append(escaped); + scratch.append(maybeEscaped.orElse(input)); offset += codePoint.numBytes; } @@ -133,16 +135,13 @@ just once which guarantees O(n) as worst case time complexity for the appending return scratch.bytesRefView(); } - /** - * Escapes a Unicode code point similar to Python's ascii() function. - * Returns the input BytesRef for printable ASCII characters that don't need escaping. - */ - private static BytesRef escapeCodePoint(BytesRef input, UnicodeUtil.UTF8CodePoint codePoint) { + + private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint codePoint) { var code = codePoint.codePoint; // Printable ASCII characters (32-126) don't need escaping if (code >= 32 && code <= 126) { - return input; + return Optional.empty(); } String resultStr = switch (code) { @@ -160,23 +159,26 @@ private static BytesRef escapeCodePoint(BytesRef input, UnicodeUtil.UTF8CodePoin }; if (resultStr != null) { - return new BytesRef(resultStr); + return Optional.of(new BytesRef(resultStr)); } - // ASCII control characters (0-31, 127) + String formatStr; + if (code < 128) { - resultStr = String.format("\\\\x%02x", code); + formatStr = "\\\\x%02x"; } else if (code <= 0xFF) { // Use xHH for code points 128-255 - resultStr = String.format("\\\\x%02x", code); + formatStr = "\\\\x%02x"; } else if (code <= 0xFFFF) { // Use uHHHH for code points 256-65535 - resultStr = String.format("\\\\u%04x", code); + formatStr = "\\\\u%04x"; } else { // Use UHHHHHHHH for code points above 65535 - resultStr = String.format("\\\\U%08x", code); + formatStr = "\\\\U%08x"; } - return new BytesRef(resultStr); + resultStr = String.format(formatStr, code); + + return Optional.of(new BytesRef(resultStr)); } } From 47e613a5de18e5173c5780d3ba050933fe6ce0f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 22:12:23 +0100 Subject: [PATCH 08/30] Improve comments --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index e05f8043f7ec3..1c11c037bc85c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -144,6 +144,7 @@ private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint code return Optional.empty(); } + // Handle special ASCII control characters String resultStr = switch (code) { case '\n' -> "\\\\n"; case '\r' -> "\\\\r"; @@ -154,8 +155,6 @@ private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint code case '\'' -> "\\\\'"; case '\"' -> "\\\\\""; default -> null; - - // Handle special ASCII control characters }; if (resultStr != null) { From fc7fa49775605e948598716270caefcea6538460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 22:12:38 +0100 Subject: [PATCH 09/30] First UT version --- .../function/scalar/string/AsciiTests.java | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java new file mode 100644 index 0000000000000..1cad26ac38ab0 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java @@ -0,0 +1,170 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class AsciiTests extends AbstractScalarFunctionTestCase { + public AsciiTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + + List cases = new ArrayList<>(); + + // Test with ASCII printable characters (should not be escaped) + cases.add(new TestCaseSupplier("ASCII printable characters", List.of(DataType.KEYWORD), () -> { + String input = randomAlphaOfLength(between(1, 100)); + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(input)) + ); + })); + + // Test with null input + cases.add(new TestCaseSupplier("null input", List.of(DataType.KEYWORD), () -> { + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(null, DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + null + ); + })); + /* + // Test with Spanish accents + cases.add(new TestCaseSupplier("Spanish accents", List.of(DataType.KEYWORD), () -> { + String input = "Café naïve résumé"; + String expected = "Caf\\xe9 na\\xefve r\\xe9sum\\xe9"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with control characters + cases.add(new TestCaseSupplier("control characters", List.of(DataType.KEYWORD), () -> { + String input = "hello\nworld\r\ttab"; + String expected = "hello\\nworld\\r\\ttab"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with Chinese characters + cases.add(new TestCaseSupplier("Chinese characters", List.of(DataType.KEYWORD), () -> { + String input = "你好世界"; + String expected = "\\u4f60\\u597d\\u4e16\\u754c"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with Japanese characters + cases.add(new TestCaseSupplier("Japanese characters", List.of(DataType.KEYWORD), () -> { + String input = "こんにちは"; + String expected = "\\u3053\\u3093\\u306b\\u3061\\u306f"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with emojis (require 8-digit Unicode escape) + cases.add(new TestCaseSupplier("emojis", List.of(DataType.KEYWORD), () -> { + String input = "🚀🔥💧🪨"; + String expected = "\\U0001f680\\U0001f525\\U0001f327\\U0001faa8"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with Greek letters + cases.add(new TestCaseSupplier("Greek letters", List.of(DataType.KEYWORD), () -> { + String input = "αβγδε"; + String expected = "\\u03b1\\u03b2\\u03b3\\u03b4\\u03b5"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with mixed content + cases.add(new TestCaseSupplier("mixed content", List.of(DataType.KEYWORD), () -> { + String input = "Hello 世界! 🌍"; + String expected = "Hello \\u4e16\\u754c! \\U0001f30d"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + })); + + // Test with empty string + cases.add(new TestCaseSupplier("empty string", List.of(DataType.KEYWORD), () -> { + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(""), DataType.KEYWORD, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef("")) + ); + })); + + // Test with TEXT type + cases.add(new TestCaseSupplier("TEXT type", List.of(DataType.TEXT), () -> { + String input = "Café"; + String expected = "Caf\\xe9"; + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.TEXT, "str")), + "AsciiEvaluator[val=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(expected)) + ); + }));*/ + + return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases); + } + + @Override + protected Expression build(Source source, List args) { + return new Ascii(source, args.get(0)); + } +} From e64a5e23044ce4dfffc023a1c52239c4edf96522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 22:58:42 +0100 Subject: [PATCH 10/30] Add function to `ScalarFunctionWritables` --- .../expression/function/scalar/ScalarFunctionWritables.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 961d577692aa0..2123f8bc61383 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Tau; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Ascii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; @@ -67,6 +68,7 @@ public class ScalarFunctionWritables { public static List getNamedWriteables() { List entries = new ArrayList<>(); entries.add(And.ENTRY); + entries.add(Ascii.ENTRY); entries.add(Atan2.ENTRY); entries.add(BitLength.ENTRY); entries.add(Case.ENTRY); From f07888d0b33b7d5d6ec7417cb6cbabaf1af321c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 22:58:58 +0100 Subject: [PATCH 11/30] Override function data type --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index 1c11c037bc85c..e4ad2dbc397da 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -58,6 +59,11 @@ private Ascii(StreamInput in) throws IOException { super(in); } + @Override + public DataType dataType() { + return DataType.KEYWORD; + } + @Override public String getWriteableName() { return ENTRY.name; From 4065f60b9359881672a36dc4b96b6fe50e91a9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 22:59:28 +0100 Subject: [PATCH 12/30] Fix UTs --- .../function/scalar/string/AsciiTests.java | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java index 1cad26ac38ab0..a916fa5d02222 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java @@ -33,8 +33,8 @@ public static Iterable parameters() { List cases = new ArrayList<>(); - // Test with ASCII printable characters (should not be escaped) - cases.add(new TestCaseSupplier("ASCII printable characters", List.of(DataType.KEYWORD), () -> { + // Test with ASCII printable characters (should not be escaped) - KEYWORD + cases.add(new TestCaseSupplier("ASCII printable characters keyword", List.of(DataType.KEYWORD), () -> { String input = randomAlphaOfLength(between(1, 100)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), @@ -44,20 +44,21 @@ public static Iterable parameters() { ); })); - // Test with null input - cases.add(new TestCaseSupplier("null input", List.of(DataType.KEYWORD), () -> { + // Test with ASCII printable characters (should not be escaped) - TEXT + cases.add(new TestCaseSupplier("ASCII printable characters text", List.of(DataType.TEXT), () -> { + String input = randomAlphaOfLength(between(1, 100)); return new TestCaseSupplier.TestCase( - List.of(new TestCaseSupplier.TypedData(null, DataType.KEYWORD, "str")), + List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.TEXT, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, - null + equalTo(new BytesRef(input)) ); })); - /* + // Test with Spanish accents cases.add(new TestCaseSupplier("Spanish accents", List.of(DataType.KEYWORD), () -> { String input = "Café naïve résumé"; - String expected = "Caf\\xe9 na\\xefve r\\xe9sum\\xe9"; + String expected = "Caf\\\\xe9 na\\\\xefve r\\\\xe9sum\\\\xe9"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -69,7 +70,7 @@ public static Iterable parameters() { // Test with control characters cases.add(new TestCaseSupplier("control characters", List.of(DataType.KEYWORD), () -> { String input = "hello\nworld\r\ttab"; - String expected = "hello\\nworld\\r\\ttab"; + String expected = "hello\\\\nworld\\\\r\\\\ttab"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -81,7 +82,7 @@ public static Iterable parameters() { // Test with Chinese characters cases.add(new TestCaseSupplier("Chinese characters", List.of(DataType.KEYWORD), () -> { String input = "你好世界"; - String expected = "\\u4f60\\u597d\\u4e16\\u754c"; + String expected = "\\\\u4f60\\\\u597d\\\\u4e16\\\\u754c"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -93,7 +94,7 @@ public static Iterable parameters() { // Test with Japanese characters cases.add(new TestCaseSupplier("Japanese characters", List.of(DataType.KEYWORD), () -> { String input = "こんにちは"; - String expected = "\\u3053\\u3093\\u306b\\u3061\\u306f"; + String expected = "\\\\u3053\\\\u3093\\\\u306b\\\\u3061\\\\u306f"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -105,7 +106,7 @@ public static Iterable parameters() { // Test with emojis (require 8-digit Unicode escape) cases.add(new TestCaseSupplier("emojis", List.of(DataType.KEYWORD), () -> { String input = "🚀🔥💧🪨"; - String expected = "\\U0001f680\\U0001f525\\U0001f327\\U0001faa8"; + String expected = "\\\\U0001f680\\\\U0001f525\\\\U0001f4a7\\\\U0001faa8"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -117,7 +118,7 @@ public static Iterable parameters() { // Test with Greek letters cases.add(new TestCaseSupplier("Greek letters", List.of(DataType.KEYWORD), () -> { String input = "αβγδε"; - String expected = "\\u03b1\\u03b2\\u03b3\\u03b4\\u03b5"; + String expected = "\\\\u03b1\\\\u03b2\\\\u03b3\\\\u03b4\\\\u03b5"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -129,7 +130,7 @@ public static Iterable parameters() { // Test with mixed content cases.add(new TestCaseSupplier("mixed content", List.of(DataType.KEYWORD), () -> { String input = "Hello 世界! 🌍"; - String expected = "Hello \\u4e16\\u754c! \\U0001f30d"; + String expected = "Hello \\\\u4e16\\\\u754c! \\\\U0001f30d"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", @@ -151,14 +152,14 @@ public static Iterable parameters() { // Test with TEXT type cases.add(new TestCaseSupplier("TEXT type", List.of(DataType.TEXT), () -> { String input = "Café"; - String expected = "Caf\\xe9"; + String expected = "Caf\\\\xe9"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.TEXT, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); - }));*/ + })); return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases); } From 846397156023c639f50992de9ddbfa1518b0a108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 23:04:52 +0100 Subject: [PATCH 13/30] Fix format --- .../expression/function/scalar/string/Ascii.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index e4ad2dbc397da..b9f68de9605f7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -81,11 +81,7 @@ protected TypeResolution resolveType() { @Override public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { var field = toEvaluator.apply(field()); - return new AsciiEvaluator.Factory( - source(), - context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), - field - ); + return new AsciiEvaluator.Factory(source(), context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), field); } @Override @@ -99,10 +95,7 @@ protected NodeInfo info() { } @Evaluator - static BytesRef process( - @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, - BytesRef val - ) { + static BytesRef process(@Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, BytesRef val) { UnicodeUtil.UTF8CodePoint codePoint = new UnicodeUtil.UTF8CodePoint(); int finalSize = 0; @@ -125,7 +118,7 @@ just once which guarantees O(n) as worst case time complexity for the appending scratch.grow(finalSize); scratch.clear(); - //The second pass fills in the escaped values + // The second pass fills in the escaped values offset = val.offset; while (offset < val.offset + val.length) { codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); @@ -141,7 +134,6 @@ just once which guarantees O(n) as worst case time complexity for the appending return scratch.bytesRefView(); } - private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint codePoint) { var code = codePoint.codePoint; From be20392b84eebb28e96b5f2a8d6bc0f99fb57e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 23:49:06 +0100 Subject: [PATCH 14/30] Docs updates --- .../_snippets/functions/description/ascii.md | 6 +++ .../_snippets/functions/examples/ascii.md | 9 +++++ .../esql/_snippets/functions/layout/ascii.md | 23 ++++++++++++ .../_snippets/functions/parameters/ascii.md | 7 ++++ .../esql/_snippets/functions/types/ascii.md | 9 +++++ .../esql/images/functions/ascii.svg | 1 + .../kibana/definition/functions/ascii.json | 37 +++++++++++++++++++ .../esql/kibana/docs/functions/ascii.md | 8 ++++ 8 files changed, 100 insertions(+) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/ascii.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/types/ascii.md create mode 100644 docs/reference/query-languages/esql/images/functions/ascii.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/ascii.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/ascii.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/description/ascii.md new file mode 100644 index 0000000000000..07890b8de106a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/ascii.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Escape non ASCII characters. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md new file mode 100644 index 0000000000000..b3efac50b7ae7 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql +null +``` + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md new file mode 100644 index 0000000000000..28a42eea68680 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `ASCII` [esql-ascii] + +**Syntax** + +:::{image} ../../../images/functions/ascii.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/ascii.md +::: + +:::{include} ../description/ascii.md +::: + +:::{include} ../types/ascii.md +::: + +:::{include} ../examples/ascii.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md new file mode 100644 index 0000000000000..c0b0fc4761d7a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`string` +: String expression. If `null`, the function returns `null`. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/types/ascii.md new file mode 100644 index 0000000000000..7221b9139e2b8 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/ascii.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| string | result | +| --- | --- | +| keyword | keyword | +| text | keyword | + diff --git a/docs/reference/query-languages/esql/images/functions/ascii.svg b/docs/reference/query-languages/esql/images/functions/ascii.svg new file mode 100644 index 0000000000000..9fb4a1896fd61 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/ascii.svg @@ -0,0 +1 @@ +ASCII(string) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json b/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json new file mode 100644 index 0000000000000..15badad9b3904 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json @@ -0,0 +1,37 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "ascii", + "description" : "Escape non ASCII characters.", + "signatures" : [ + { + "params" : [ + { + "name" : "string", + "type" : "keyword", + "optional" : false, + "description" : "String expression. If `null`, the function returns `null`." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "string", + "type" : "text", + "optional" : false, + "description" : "String expression. If `null`, the function returns `null`." + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "examples" : [ + null + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md b/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md new file mode 100644 index 0000000000000..f71dc2728f106 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md @@ -0,0 +1,8 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### ASCII +Escape non ASCII characters. + +```esql +null +``` From baf9dc31833c7225e97a381421e666d25cc1040c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Tue, 28 Oct 2025 23:49:37 +0100 Subject: [PATCH 15/30] Adds ES version from which the function will be supported. --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index b9f68de9605f7..19daebe0e1d0f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -21,6 +21,8 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; @@ -43,7 +45,7 @@ public final class Ascii extends UnaryScalarFunction { description = "Escape non ASCII characters.", examples = @Example(file = "string", tag = "ascii") ) - + @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.2.0") public Ascii( Source source, @Param( From 7bd2adefada519b252f3eb68c2f6be1adca8c00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:17:26 +0100 Subject: [PATCH 16/30] Add `required_capacity` to ASCII CsvTests --- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index f42ef384b7afa..480259846ed5a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2749,7 +2749,8 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword ; ascii -row a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | eval x = ascii(a) | keep x; +required_capability: ascii +ROW a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; x:keyword Hello\\n\\t \\u4e16\\u754c! \\U0001f30d Caf\\xe9 na\\xefve r\\xe9sum\\xe9 \\u3053\\u3093\\u306b\\u3061\\u306f \\U0001f389 \\u4e2d\\u6587\\u6d4b\\u8bd5 \\u03b1\\u03b2\\u03b3\\u03b4\\u03b5 \\u65e5\\u672c\\u8a9e\\u30c6\\u30b9\\u30c8 \\U0001f680\\U0001f525\\U0001f4a7\\U0001faa8 From 92b473426a7c46d76ec3aff8c336e79d5bba9b3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:17:42 +0100 Subject: [PATCH 17/30] Fix `appliesTo` annotation --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index 19daebe0e1d0f..a5d698c858e3f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -43,9 +43,9 @@ public final class Ascii extends UnaryScalarFunction { @FunctionInfo( returnType = { "keyword" }, description = "Escape non ASCII characters.", - examples = @Example(file = "string", tag = "ascii") + examples = @Example(file = "string", tag = "ascii"), + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.2.0") } ) - @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.2.0") public Ascii( Source source, @Param( From c02b9455b9495bbe80353b1897c0613281ccb281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:18:08 +0100 Subject: [PATCH 18/30] Add ASCII to capabilities list --- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3acbb4d36899e..8b53023934af3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1529,7 +1529,12 @@ public enum Cap { /** * Fix double release in inline stats when LocalRelation is reused */ - INLINE_STATS_DOUBLE_RELEASE_FIX(INLINESTATS_V11.enabled) + INLINE_STATS_DOUBLE_RELEASE_FIX(INLINESTATS_V11.enabled), + + /** + * Support for string function ASCII + */ + ASCII ; From 46a5dd210c196dea659236cb54c2b5da6e9394e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:18:29 +0100 Subject: [PATCH 19/30] Docs: Reference string in functions listings --- .../query-languages/esql/_snippets/lists/string-functions.md | 1 + .../esql/functions-operators/string-functions.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md index ac5f0a327fadb..b447267fe09b1 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md @@ -1,3 +1,4 @@ +* [`ASCII`](../../functions-operators/string-functions.md#ascii) * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length) * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length) * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat) diff --git a/docs/reference/query-languages/esql/functions-operators/string-functions.md b/docs/reference/query-languages/esql/functions-operators/string-functions.md index 6b887bd13212d..999e6cdc98189 100644 --- a/docs/reference/query-languages/esql/functions-operators/string-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/string-functions.md @@ -14,6 +14,8 @@ mapped_pages: :::{include} ../_snippets/lists/string-functions.md ::: +:::{include} ../_snippets/functions/layout/ascii.md +::: :::{include} ../_snippets/functions/layout/bit_length.md ::: From f9308aa30662334d5c17b7e22a8baae6f834264c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:33:39 +0100 Subject: [PATCH 20/30] Fix docs broken reference --- .../query-languages/esql/_snippets/lists/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md index b447267fe09b1..6bb02604acc97 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md @@ -1,4 +1,4 @@ -* [`ASCII`](../../functions-operators/string-functions.md#ascii) +* [`ASCII`](../../functions-operators/string-functions.md#esql-ascii) * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length) * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length) * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat) From 400aad1cefa50d9ec09abff4137cb50160e43193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:52:17 +0100 Subject: [PATCH 21/30] Replace String.format with ES commons Strings.format --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index a5d698c858e3f..08a3dfb8f4ddd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -9,6 +9,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; @@ -176,7 +177,7 @@ private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint code formatStr = "\\\\U%08x"; } - resultStr = String.format(formatStr, code); + resultStr = Strings.format(formatStr, code); return Optional.of(new BytesRef(resultStr)); } From b420a7d3616b83baf7f1849aebef5dcb499a344b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 00:52:52 +0100 Subject: [PATCH 22/30] Updates on docs generated files --- .../query-languages/esql/_snippets/functions/layout/ascii.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md index 28a42eea68680..526392dbaa696 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md @@ -1,6 +1,9 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. ## `ASCII` [esql-ascii] +```{applies_to} +stack: ga 9.2.0 +``` **Syntax** From 6f89c535faf7ca918c26c1891f069ce023d5b592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 01:05:22 +0100 Subject: [PATCH 23/30] Update docs with example --- .../esql/_snippets/functions/examples/ascii.md | 6 +++++- .../esql/kibana/definition/functions/ascii.json | 2 +- .../query-languages/esql/kibana/docs/functions/ascii.md | 2 +- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 4 ++++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md index b3efac50b7ae7..87c8452016bf7 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md @@ -3,7 +3,11 @@ **Example** ```esql -null +ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; ``` +| x:keyword | +| --- | +| Hello\\n\\t \\u4e16\\u754c! \\U0001f30d Caf\\xe9 na\\xefve r\\xe9sum\\xe9 \\u3053\\u3093\\u306b\\u3061\\u306f \\U0001f389 \\u4e2d\\u6587\\u6d4b\\u8bd5 \\u03b1\\u03b2\\u03b3\\u03b4\\u03b5 \\u65e5\\u672c\\u8a9e\\u30c6\\u30b9\\u30c8 \\U0001f680\\U0001f525\\U0001f4a7\\U0001faa8 | + diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json b/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json index 15badad9b3904..d0a7424b05d65 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json @@ -30,7 +30,7 @@ } ], "examples" : [ - null + "ROW a = \"Hello\\\\n\\\\t 世界! \uD83C\uDF0D Café naïve résumé こんにちは \uD83C\uDF89 中文测试 αβγδε 日本語テスト \uD83D\uDE80\uD83D\uDD25\uD83D\uDCA7\uD83E\uDEA8\" | EVAL x = ASCII(a) | KEEP x;" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md b/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md index f71dc2728f106..56661a4b0229e 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md @@ -4,5 +4,5 @@ Escape non ASCII characters. ```esql -null +ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; ``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 480259846ed5a..37471bb283120 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2750,8 +2750,12 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword ascii required_capability: ascii +// tag::ascii[] ROW a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; +// end::ascii[] +// tag::ascii-result[] x:keyword Hello\\n\\t \\u4e16\\u754c! \\U0001f30d Caf\\xe9 na\\xefve r\\xe9sum\\xe9 \\u3053\\u3093\\u306b\\u3061\\u306f \\U0001f389 \\u4e2d\\u6587\\u6d4b\\u8bd5 \\u03b1\\u03b2\\u03b3\\u03b4\\u03b5 \\u65e5\\u672c\\u8a9e\\u30c6\\u30b9\\u30c8 \\U0001f680\\U0001f525\\U0001f4a7\\U0001faa8 +// end::ascii-result[] ; From a6b4096f45f52dd3cc4e027ddb59fe5b2f762fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 20:31:49 +0100 Subject: [PATCH 24/30] Remove the first size computation path optimization --- .../function/scalar/string/Ascii.java | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index 08a3dfb8f4ddd..1e4e2583829d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -101,28 +101,12 @@ protected NodeInfo info() { static BytesRef process(@Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, BytesRef val) { UnicodeUtil.UTF8CodePoint codePoint = new UnicodeUtil.UTF8CodePoint(); - int finalSize = 0; - - /* A first iteration determines the total grow size. This is used to grow the scratch array - just once which guarantees O(n) as worst case time complexity for the appending operation. - */ - int offset = val.offset; - while (offset < val.offset + val.length) { - codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); - - BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); - var maybeEscaped = escapeCodePoint(codePoint); - - finalSize += maybeEscaped.orElse(input).length; - - offset += codePoint.numBytes; - } - - scratch.grow(finalSize); + // Pre-reserve at least as much as the input. + scratch.grow(val.length); scratch.clear(); // The second pass fills in the escaped values - offset = val.offset; + int offset = val.offset; while (offset < val.offset + val.length) { codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); From e25b8998686ed7a29f03281b392addf928a7b90d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 21:48:19 +0100 Subject: [PATCH 25/30] Optimize implementation, removing optional return values --- .../function/scalar/string/Ascii.java | 92 +++++++++---------- 1 file changed, 43 insertions(+), 49 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index 1e4e2583829d2..d3bc856fc1a3d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.util.List; -import java.util.Optional; import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -109,60 +108,55 @@ static BytesRef process(@Fixed(includeInToString = false, scope = THREAD_LOCAL) int offset = val.offset; while (offset < val.offset + val.length) { codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint); + var code = codePoint.codePoint; BytesRef input = new BytesRef(val.bytes, offset, codePoint.numBytes); - var maybeEscaped = escapeCodePoint(codePoint); - - scratch.append(maybeEscaped.orElse(input)); + // Bump offset so continue can be used starting from this point offset += codePoint.numBytes; - } - - return scratch.bytesRefView(); - } - - private static Optional escapeCodePoint(UnicodeUtil.UTF8CodePoint codePoint) { - var code = codePoint.codePoint; - // Printable ASCII characters (32-126) don't need escaping - if (code >= 32 && code <= 126) { - return Optional.empty(); + // Check for special ASCII control characters + String escapeStr = switch (code) { + case '\n' -> "\\\\n"; + case '\r' -> "\\\\r"; + case '\t' -> "\\\\t"; + case '\b' -> "\\\\b"; + case '\f' -> "\\\\f"; + case '\\' -> "\\\\\\"; + case '\'' -> "\\\\'"; + case '\"' -> "\\\\\""; + default -> null; + }; + + // Printable ASCII characters (32-126) don't need escaping + if (escapeStr == null && code >= 32 && code <= 126) { + scratch.append(input); + continue; + } + + // For any other, we use escaped templates depending on the range + if (escapeStr == null) { + String formatStr; + + if (code < 128) { + formatStr = "\\\\x%02x"; + } else if (code <= 0xFF) { + // Use xHH for code points 128-255 + formatStr = "\\\\x%02x"; + } else if (code <= 0xFFFF) { + // Use uHHHH for code points 256-65535 + formatStr = "\\\\u%04x"; + } else { + // Use UHHHHHHHH for code points above 65535 + formatStr = "\\\\U%08x"; + } + + escapeStr = Strings.format(formatStr, code); + } + + scratch.append(new BytesRef(escapeStr)); } - // Handle special ASCII control characters - String resultStr = switch (code) { - case '\n' -> "\\\\n"; - case '\r' -> "\\\\r"; - case '\t' -> "\\\\t"; - case '\b' -> "\\\\b"; - case '\f' -> "\\\\f"; - case '\\' -> "\\\\\\"; - case '\'' -> "\\\\'"; - case '\"' -> "\\\\\""; - default -> null; - }; - - if (resultStr != null) { - return Optional.of(new BytesRef(resultStr)); - } - - String formatStr; - - if (code < 128) { - formatStr = "\\\\x%02x"; - } else if (code <= 0xFF) { - // Use xHH for code points 128-255 - formatStr = "\\\\x%02x"; - } else if (code <= 0xFFFF) { - // Use uHHHH for code points 256-65535 - formatStr = "\\\\u%04x"; - } else { - // Use UHHHHHHHH for code points above 65535 - formatStr = "\\\\U%08x"; - } - - resultStr = Strings.format(formatStr, code); - - return Optional.of(new BytesRef(resultStr)); + return scratch.bytesRefView(); } } From b6d4a4acee7394b5989673a179e08070add6c7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 21:48:31 +0100 Subject: [PATCH 26/30] Expand test cases --- .../esql/expression/function/scalar/string/AsciiTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java index a916fa5d02222..99dc24c3d9c34 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java @@ -69,8 +69,8 @@ public static Iterable parameters() { // Test with control characters cases.add(new TestCaseSupplier("control characters", List.of(DataType.KEYWORD), () -> { - String input = "hello\nworld\r\ttab"; - String expected = "hello\\\\nworld\\\\r\\\\ttab"; + String input = "hello\nworld\r\ttab\\"; + String expected = "hello\\\\nworld\\\\r\\\\ttab\\\\\\"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", From 2abde3fcfb23306e655a485d8d2fd1b8e5df49bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 21:55:58 +0100 Subject: [PATCH 27/30] Add test cases for '\"' --- .../xpack/esql/expression/function/scalar/string/Ascii.java | 2 +- .../esql/expression/function/scalar/string/AsciiTests.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index d3bc856fc1a3d..4b4269427531f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -122,7 +122,7 @@ static BytesRef process(@Fixed(includeInToString = false, scope = THREAD_LOCAL) case '\t' -> "\\\\t"; case '\b' -> "\\\\b"; case '\f' -> "\\\\f"; - case '\\' -> "\\\\\\"; + case '\\' -> "\\\\\\\\"; case '\'' -> "\\\\'"; case '\"' -> "\\\\\""; default -> null; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java index 99dc24c3d9c34..4a3a634555568 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java @@ -69,8 +69,8 @@ public static Iterable parameters() { // Test with control characters cases.add(new TestCaseSupplier("control characters", List.of(DataType.KEYWORD), () -> { - String input = "hello\nworld\r\ttab\\"; - String expected = "hello\\\\nworld\\\\r\\\\ttab\\\\\\"; + String input = "hello\nworld\r\t\"tab"; + String expected = "hello\\\\nworld\\\\r\\\\t\\\\\"tab"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), "AsciiEvaluator[val=Attribute[channel=0]]", From 10495f2e31c58848a778f54c755b0cc9946019d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 29 Oct 2025 22:27:23 +0100 Subject: [PATCH 28/30] Move reusable codepoint to be a fix static value in the evaluator. --- .../function/scalar/string/AsciiEvaluator.java | 17 +++++++++++++---- .../function/scalar/string/Ascii.java | 15 +++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java index 7d99c6f42d170..48351b419aa00 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java @@ -10,6 +10,7 @@ import java.util.function.Function; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; @@ -32,6 +33,8 @@ public final class AsciiEvaluator implements EvalOperator.ExpressionEvaluator { private final BreakingBytesRefBuilder scratch; + private final UnicodeUtil.UTF8CodePoint codePoint; + private final EvalOperator.ExpressionEvaluator val; private final DriverContext driverContext; @@ -39,9 +42,11 @@ public final class AsciiEvaluator implements EvalOperator.ExpressionEvaluator { private Warnings warnings; public AsciiEvaluator(Source source, BreakingBytesRefBuilder scratch, - EvalOperator.ExpressionEvaluator val, DriverContext driverContext) { + UnicodeUtil.UTF8CodePoint codePoint, EvalOperator.ExpressionEvaluator val, + DriverContext driverContext) { this.source = source; this.scratch = scratch; + this.codePoint = codePoint; this.val = val; this.driverContext = driverContext; } @@ -80,7 +85,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) { continue position; } BytesRef val = valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch); - result.appendBytesRef(Ascii.process(this.scratch, val)); + result.appendBytesRef(Ascii.process(this.scratch, this.codePoint, val)); } return result.build(); } @@ -91,7 +96,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector valVector) { BytesRef valScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { BytesRef val = valVector.getBytesRef(p, valScratch); - result.appendBytesRef(Ascii.process(this.scratch, val)); + result.appendBytesRef(Ascii.process(this.scratch, this.codePoint, val)); } return result.build(); } @@ -124,18 +129,22 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Function scratch; + private final Function codePoint; + private final EvalOperator.ExpressionEvaluator.Factory val; public Factory(Source source, Function scratch, + Function codePoint, EvalOperator.ExpressionEvaluator.Factory val) { this.source = source; this.scratch = scratch; + this.codePoint = codePoint; this.val = val; } @Override public AsciiEvaluator get(DriverContext context) { - return new AsciiEvaluator(source, scratch.apply(context), val.get(context), context); + return new AsciiEvaluator(source, scratch.apply(context), codePoint.apply(context), val.get(context), context); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java index 4b4269427531f..8b08a0f023100 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java @@ -83,7 +83,12 @@ protected TypeResolution resolveType() { @Override public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { var field = toEvaluator.apply(field()); - return new AsciiEvaluator.Factory(source(), context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), field); + return new AsciiEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), + context -> new UnicodeUtil.UTF8CodePoint(), + field + ); } @Override @@ -97,9 +102,11 @@ protected NodeInfo info() { } @Evaluator - static BytesRef process(@Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, BytesRef val) { - UnicodeUtil.UTF8CodePoint codePoint = new UnicodeUtil.UTF8CodePoint(); - + static BytesRef process( + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, + @Fixed(includeInToString = false, scope = THREAD_LOCAL) UnicodeUtil.UTF8CodePoint codePoint, + BytesRef val + ) { // Pre-reserve at least as much as the input. scratch.grow(val.length); scratch.clear(); From f755162846ad2f33ffb40559505bbe678364485e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Thu, 30 Oct 2025 12:46:52 +0100 Subject: [PATCH 29/30] Rename function to `TO_ASCII` --- .../description/{ascii.md => to_ascii.md} | 0 .../examples/{ascii.md => to_ascii.md} | 2 +- .../esql/_snippets/functions/layout/ascii.md | 26 ----------------- .../_snippets/functions/layout/to_ascii.md | 26 +++++++++++++++++ .../parameters/{ascii.md => to_ascii.md} | 0 .../functions/types/{ascii.md => to_ascii.md} | 0 .../esql/_snippets/lists/string-functions.md | 2 +- .../functions-operators/string-functions.md | 2 +- .../esql/images/functions/ascii.svg | 1 - .../esql/images/functions/to_ascii.svg | 1 + .../functions/{ascii.json => to_ascii.json} | 4 +-- .../docs/functions/{ascii.md => to_ascii.md} | 4 +-- .../src/main/resources/string.csv-spec | 14 +++++----- ...iiEvaluator.java => ToAsciiEvaluator.java} | 20 ++++++------- .../xpack/esql/action/EsqlCapabilities.java | 4 +-- .../function/EsqlFunctionRegistry.java | 4 +-- .../scalar/ScalarFunctionWritables.java | 4 +-- .../string/{Ascii.java => ToAscii.java} | 18 ++++++------ .../{AsciiTests.java => ToAsciiTests.java} | 28 +++++++++---------- 19 files changed, 80 insertions(+), 80 deletions(-) rename docs/reference/query-languages/esql/_snippets/functions/description/{ascii.md => to_ascii.md} (100%) rename docs/reference/query-languages/esql/_snippets/functions/examples/{ascii.md => to_ascii.md} (93%) delete mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/to_ascii.md rename docs/reference/query-languages/esql/_snippets/functions/parameters/{ascii.md => to_ascii.md} (100%) rename docs/reference/query-languages/esql/_snippets/functions/types/{ascii.md => to_ascii.md} (100%) delete mode 100644 docs/reference/query-languages/esql/images/functions/ascii.svg create mode 100644 docs/reference/query-languages/esql/images/functions/to_ascii.svg rename docs/reference/query-languages/esql/kibana/definition/functions/{ascii.json => to_ascii.json} (94%) rename docs/reference/query-languages/esql/kibana/docs/functions/{ascii.md => to_ascii.md} (83%) rename x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/{AsciiEvaluator.java => ToAsciiEvaluator.java} (87%) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/{Ascii.java => ToAscii.java} (92%) rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/{AsciiTests.java => ToAsciiTests.java} (88%) diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/description/to_ascii.md similarity index 100% rename from docs/reference/query-languages/esql/_snippets/functions/description/ascii.md rename to docs/reference/query-languages/esql/_snippets/functions/description/to_ascii.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/examples/to_ascii.md similarity index 93% rename from docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md rename to docs/reference/query-languages/esql/_snippets/functions/examples/to_ascii.md index 87c8452016bf7..e801695cb4421 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/ascii.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/to_ascii.md @@ -3,7 +3,7 @@ **Example** ```esql -ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; +ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = TO_ASCII(a) | KEEP x; ``` | x:keyword | diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md deleted file mode 100644 index 526392dbaa696..0000000000000 --- a/docs/reference/query-languages/esql/_snippets/functions/layout/ascii.md +++ /dev/null @@ -1,26 +0,0 @@ -% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. - -## `ASCII` [esql-ascii] -```{applies_to} -stack: ga 9.2.0 -``` - -**Syntax** - -:::{image} ../../../images/functions/ascii.svg -:alt: Embedded -:class: text-center -::: - - -:::{include} ../parameters/ascii.md -::: - -:::{include} ../description/ascii.md -::: - -:::{include} ../types/ascii.md -::: - -:::{include} ../examples/ascii.md -::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/to_ascii.md b/docs/reference/query-languages/esql/_snippets/functions/layout/to_ascii.md new file mode 100644 index 0000000000000..aa08b9f17dc23 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/to_ascii.md @@ -0,0 +1,26 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `TO_ASCII` [esql-to_ascii] +```{applies_to} +stack: ga 9.2.0 +``` + +**Syntax** + +:::{image} ../../../images/functions/to_ascii.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/to_ascii.md +::: + +:::{include} ../description/to_ascii.md +::: + +:::{include} ../types/to_ascii.md +::: + +:::{include} ../examples/to_ascii.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/to_ascii.md similarity index 100% rename from docs/reference/query-languages/esql/_snippets/functions/parameters/ascii.md rename to docs/reference/query-languages/esql/_snippets/functions/parameters/to_ascii.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/ascii.md b/docs/reference/query-languages/esql/_snippets/functions/types/to_ascii.md similarity index 100% rename from docs/reference/query-languages/esql/_snippets/functions/types/ascii.md rename to docs/reference/query-languages/esql/_snippets/functions/types/to_ascii.md diff --git a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md index 6bb02604acc97..7bec3b758f9f4 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md @@ -1,4 +1,4 @@ -* [`ASCII`](../../functions-operators/string-functions.md#esql-ascii) +* [`TO_ASCII`](../../functions-operators/string-functions.md#esql-to_ascii) * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length) * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length) * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat) diff --git a/docs/reference/query-languages/esql/functions-operators/string-functions.md b/docs/reference/query-languages/esql/functions-operators/string-functions.md index 999e6cdc98189..309eacb50e30b 100644 --- a/docs/reference/query-languages/esql/functions-operators/string-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/string-functions.md @@ -14,7 +14,7 @@ mapped_pages: :::{include} ../_snippets/lists/string-functions.md ::: -:::{include} ../_snippets/functions/layout/ascii.md +:::{include} ../_snippets/functions/layout/to_ascii.md ::: :::{include} ../_snippets/functions/layout/bit_length.md diff --git a/docs/reference/query-languages/esql/images/functions/ascii.svg b/docs/reference/query-languages/esql/images/functions/ascii.svg deleted file mode 100644 index 9fb4a1896fd61..0000000000000 --- a/docs/reference/query-languages/esql/images/functions/ascii.svg +++ /dev/null @@ -1 +0,0 @@ -ASCII(string) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/images/functions/to_ascii.svg b/docs/reference/query-languages/esql/images/functions/to_ascii.svg new file mode 100644 index 0000000000000..c7f20de3c671f --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/to_ascii.svg @@ -0,0 +1 @@ +TO_ASCII(string) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json b/docs/reference/query-languages/esql/kibana/definition/functions/to_ascii.json similarity index 94% rename from docs/reference/query-languages/esql/kibana/definition/functions/ascii.json rename to docs/reference/query-languages/esql/kibana/definition/functions/to_ascii.json index d0a7424b05d65..c1140fd00b68c 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/ascii.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/to_ascii.json @@ -1,7 +1,7 @@ { "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", "type" : "scalar", - "name" : "ascii", + "name" : "to_ascii", "description" : "Escape non ASCII characters.", "signatures" : [ { @@ -30,7 +30,7 @@ } ], "examples" : [ - "ROW a = \"Hello\\\\n\\\\t 世界! \uD83C\uDF0D Café naïve résumé こんにちは \uD83C\uDF89 中文测试 αβγδε 日本語テスト \uD83D\uDE80\uD83D\uDD25\uD83D\uDCA7\uD83E\uDEA8\" | EVAL x = ASCII(a) | KEEP x;" + "ROW a = \"Hello\\\\n\\\\t 世界! \uD83C\uDF0D Café naïve résumé こんにちは \uD83C\uDF89 中文测试 αβγδε 日本語テスト \uD83D\uDE80\uD83D\uDD25\uD83D\uDCA7\uD83E\uDEA8\" | EVAL x = TO_ASCII(a) | KEEP x;" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md b/docs/reference/query-languages/esql/kibana/docs/functions/to_ascii.md similarity index 83% rename from docs/reference/query-languages/esql/kibana/docs/functions/ascii.md rename to docs/reference/query-languages/esql/kibana/docs/functions/to_ascii.md index 56661a4b0229e..a12aa975ba128 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/ascii.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/to_ascii.md @@ -1,8 +1,8 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. -### ASCII +### TO ASCII Escape non ASCII characters. ```esql -ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; +ROW a = "Hello\\n\\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = TO_ASCII(a) | KEEP x; ``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 37471bb283120..239f6648e6242 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -2748,14 +2748,14 @@ book_no:keyword | author_encoded:keyword | title_encoded:keyword 1463 | J.%20R.%20R.%20Tolkien | Realms%20of%20Tolkien%3A%20Images%20of%20Middle-earth ; -ascii -required_capability: ascii -// tag::ascii[] -ROW a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = ASCII(a) | KEEP x; -// end::ascii[] +to_ascii +required_capability: to_ascii +// tag::to_ascii[] +ROW a = "Hello\n\t 世界! 🌍 Café naïve résumé こんにちは 🎉 中文测试 αβγδε 日本語テスト 🚀🔥💧🪨" | EVAL x = TO_ASCII(a) | KEEP x; +// end::to_ascii[] -// tag::ascii-result[] +// tag::to_ascii-result[] x:keyword Hello\\n\\t \\u4e16\\u754c! \\U0001f30d Caf\\xe9 na\\xefve r\\xe9sum\\xe9 \\u3053\\u3093\\u306b\\u3061\\u306f \\U0001f389 \\u4e2d\\u6587\\u6d4b\\u8bd5 \\u03b1\\u03b2\\u03b3\\u03b4\\u03b5 \\u65e5\\u672c\\u8a9e\\u30c6\\u30b9\\u30c8 \\U0001f680\\U0001f525\\U0001f4a7\\U0001faa8 -// end::ascii-result[] +// end::to_ascii-result[] ; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java similarity index 87% rename from x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java rename to x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java index 48351b419aa00..82e1f7ef74112 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java @@ -23,11 +23,11 @@ import org.elasticsearch.xpack.esql.core.tree.Source; /** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Ascii}. + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToAscii}. * This class is generated. Edit {@code EvaluatorImplementer} instead. */ -public final class AsciiEvaluator implements EvalOperator.ExpressionEvaluator { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(AsciiEvaluator.class); +public final class ToAsciiEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToAsciiEvaluator.class); private final Source source; @@ -41,7 +41,7 @@ public final class AsciiEvaluator implements EvalOperator.ExpressionEvaluator { private Warnings warnings; - public AsciiEvaluator(Source source, BreakingBytesRefBuilder scratch, + public ToAsciiEvaluator(Source source, BreakingBytesRefBuilder scratch, UnicodeUtil.UTF8CodePoint codePoint, EvalOperator.ExpressionEvaluator val, DriverContext driverContext) { this.source = source; @@ -85,7 +85,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) { continue position; } BytesRef val = valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch); - result.appendBytesRef(Ascii.process(this.scratch, this.codePoint, val)); + result.appendBytesRef(ToAscii.process(this.scratch, this.codePoint, val)); } return result.build(); } @@ -96,7 +96,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector valVector) { BytesRef valScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { BytesRef val = valVector.getBytesRef(p, valScratch); - result.appendBytesRef(Ascii.process(this.scratch, this.codePoint, val)); + result.appendBytesRef(ToAscii.process(this.scratch, this.codePoint, val)); } return result.build(); } @@ -104,7 +104,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector valVector) { @Override public String toString() { - return "AsciiEvaluator[" + "val=" + val + "]"; + return "ToAsciiEvaluator[" + "val=" + val + "]"; } @Override @@ -143,13 +143,13 @@ public Factory(Source source, Function s } @Override - public AsciiEvaluator get(DriverContext context) { - return new AsciiEvaluator(source, scratch.apply(context), codePoint.apply(context), val.get(context), context); + public ToAsciiEvaluator get(DriverContext context) { + return new ToAsciiEvaluator(source, scratch.apply(context), codePoint.apply(context), val.get(context), context); } @Override public String toString() { - return "AsciiEvaluator[" + "val=" + val + "]"; + return "ToAsciiEvaluator[" + "val=" + val + "]"; } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 8b53023934af3..928cfbba97c71 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1532,9 +1532,9 @@ public enum Cap { INLINE_STATS_DOUBLE_RELEASE_FIX(INLINESTATS_V11.enabled), /** - * Support for string function ASCII + * Support for string function TO_ASCII */ - ASCII + TO_ASCII ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index e8315aba26cfe..e354232965e49 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -175,7 +175,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StY; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin; -import org.elasticsearch.xpack.esql.expression.function.scalar.string.Ascii; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; @@ -398,7 +398,7 @@ private static FunctionDefinition[][] functions() { def(Tau.class, Tau::new, "tau") }, // string new FunctionDefinition[] { - def(Ascii.class, Ascii::new, "ascii"), + def(ToAscii.class, ToAscii::new, "to_ascii"), def(BitLength.class, BitLength::new, "bit_length"), def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 2123f8bc61383..978e9f64ace7e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -36,7 +36,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Tau; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; -import org.elasticsearch.xpack.esql.expression.function.scalar.string.Ascii; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; @@ -68,7 +68,7 @@ public class ScalarFunctionWritables { public static List getNamedWriteables() { List entries = new ArrayList<>(); entries.add(And.ENTRY); - entries.add(Ascii.ENTRY); + entries.add(ToAscii.ENTRY); entries.add(Atan2.ENTRY); entries.add(BitLength.ENTRY); entries.add(Case.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java similarity index 92% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java index 8b08a0f023100..5c555872ad682 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Ascii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java @@ -37,16 +37,16 @@ /** * Escape non ASCII characters */ -public final class Ascii extends UnaryScalarFunction { - public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Ascii", Ascii::new); +public final class ToAscii extends UnaryScalarFunction { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "ToAscii", ToAscii::new); @FunctionInfo( returnType = { "keyword" }, description = "Escape non ASCII characters.", - examples = @Example(file = "string", tag = "ascii"), + examples = @Example(file = "string", tag = "to_ascii"), appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.2.0") } ) - public Ascii( + public ToAscii( Source source, @Param( name = "string", @@ -57,7 +57,7 @@ public Ascii( super(source, str); } - private Ascii(StreamInput in) throws IOException { + private ToAscii(StreamInput in) throws IOException { super(in); } @@ -83,9 +83,9 @@ protected TypeResolution resolveType() { @Override public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { var field = toEvaluator.apply(field()); - return new AsciiEvaluator.Factory( + return new ToAsciiEvaluator.Factory( source(), - context -> new BreakingBytesRefBuilder(context.breaker(), "ascii"), + context -> new BreakingBytesRefBuilder(context.breaker(), "to_ascii"), context -> new UnicodeUtil.UTF8CodePoint(), field ); @@ -93,12 +93,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { @Override public Expression replaceChildren(List newChildren) { - return new Ascii(source(), newChildren.get(0)); + return new ToAscii(source(), newChildren.get(0)); } @Override protected NodeInfo info() { - return NodeInfo.create(this, Ascii::new, field()); + return NodeInfo.create(this, ToAscii::new, field()); } @Evaluator diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiTests.java similarity index 88% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiTests.java index 4a3a634555568..65bf77ab846c0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AsciiTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiTests.java @@ -23,8 +23,8 @@ import static org.hamcrest.Matchers.equalTo; -public class AsciiTests extends AbstractScalarFunctionTestCase { - public AsciiTests(@Name("TestCase") Supplier testCaseSupplier) { +public class ToAsciiTests extends AbstractScalarFunctionTestCase { + public ToAsciiTests(@Name("TestCase") Supplier testCaseSupplier) { this.testCase = testCaseSupplier.get(); } @@ -38,7 +38,7 @@ public static Iterable parameters() { String input = randomAlphaOfLength(between(1, 100)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(input)) ); @@ -49,7 +49,7 @@ public static Iterable parameters() { String input = randomAlphaOfLength(between(1, 100)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.TEXT, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(input)) ); @@ -61,7 +61,7 @@ public static Iterable parameters() { String expected = "Caf\\\\xe9 na\\\\xefve r\\\\xe9sum\\\\xe9"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -73,7 +73,7 @@ public static Iterable parameters() { String expected = "hello\\\\nworld\\\\r\\\\t\\\\\"tab"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -85,7 +85,7 @@ public static Iterable parameters() { String expected = "\\\\u4f60\\\\u597d\\\\u4e16\\\\u754c"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -97,7 +97,7 @@ public static Iterable parameters() { String expected = "\\\\u3053\\\\u3093\\\\u306b\\\\u3061\\\\u306f"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -109,7 +109,7 @@ public static Iterable parameters() { String expected = "\\\\U0001f680\\\\U0001f525\\\\U0001f4a7\\\\U0001faa8"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -121,7 +121,7 @@ public static Iterable parameters() { String expected = "\\\\u03b1\\\\u03b2\\\\u03b3\\\\u03b4\\\\u03b5"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -133,7 +133,7 @@ public static Iterable parameters() { String expected = "Hello \\\\u4e16\\\\u754c! \\\\U0001f30d"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -143,7 +143,7 @@ public static Iterable parameters() { cases.add(new TestCaseSupplier("empty string", List.of(DataType.KEYWORD), () -> { return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(""), DataType.KEYWORD, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef("")) ); @@ -155,7 +155,7 @@ public static Iterable parameters() { String expected = "Caf\\\\xe9"; return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(input), DataType.TEXT, "str")), - "AsciiEvaluator[val=Attribute[channel=0]]", + "ToAsciiEvaluator[val=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(expected)) ); @@ -166,6 +166,6 @@ public static Iterable parameters() { @Override protected Expression build(Source source, List args) { - return new Ascii(source, args.get(0)); + return new ToAscii(source, args.get(0)); } } From be5beefa2a699bff708a14b10c2a58da9bc0ebb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fco=2E=20P=C3=A9rez=20Hidalgo?= Date: Wed, 5 Nov 2025 11:35:25 +0100 Subject: [PATCH 30/30] Use `@ConvertEvaluator` instead of `@Evaluator` to allow processing multivalued inputs --- .../scalar/string/ToAsciiEvaluator.java | 159 ++++++++++-------- .../function/EsqlFunctionRegistry.java | 2 +- .../scalar/ScalarFunctionWritables.java | 2 +- .../function/scalar/string/ToAscii.java | 12 +- 4 files changed, 96 insertions(+), 79 deletions(-) diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java index 82e1f7ef74112..1a89a0ab38bcb 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAsciiEvaluator.java @@ -4,7 +4,6 @@ // 2.0. package org.elasticsearch.xpack.esql.expression.function.scalar.string; -import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; import java.util.function.Function; @@ -14,91 +13,114 @@ import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.OrdinalBytesRefVector; +import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.compute.operator.Warnings; import org.elasticsearch.core.Releasables; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; /** * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToAscii}. - * This class is generated. Edit {@code EvaluatorImplementer} instead. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. */ -public final class ToAsciiEvaluator implements EvalOperator.ExpressionEvaluator { +public final class ToAsciiEvaluator extends AbstractConvertFunction.AbstractEvaluator { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToAsciiEvaluator.class); - private final Source source; + private final EvalOperator.ExpressionEvaluator val; private final BreakingBytesRefBuilder scratch; private final UnicodeUtil.UTF8CodePoint codePoint; - private final EvalOperator.ExpressionEvaluator val; - - private final DriverContext driverContext; - - private Warnings warnings; - - public ToAsciiEvaluator(Source source, BreakingBytesRefBuilder scratch, - UnicodeUtil.UTF8CodePoint codePoint, EvalOperator.ExpressionEvaluator val, + public ToAsciiEvaluator(Source source, EvalOperator.ExpressionEvaluator val, + BreakingBytesRefBuilder scratch, UnicodeUtil.UTF8CodePoint codePoint, DriverContext driverContext) { - this.source = source; + super(driverContext, source); + this.val = val; this.scratch = scratch; this.codePoint = codePoint; - this.val = val; - this.driverContext = driverContext; } @Override - public Block eval(Page page) { - try (BytesRefBlock valBlock = (BytesRefBlock) val.eval(page)) { - BytesRefVector valVector = valBlock.asVector(); - if (valVector == null) { - return eval(page.getPositionCount(), valBlock); + public EvalOperator.ExpressionEvaluator next() { + return val; + } + + @Override + public Block evalVector(Vector v) { + BytesRefVector vector = (BytesRefVector) v; + OrdinalBytesRefVector ordinals = vector.asOrdinals(); + if (ordinals != null) { + return evalOrdinals(ordinals); + } + int positionCount = v.getPositionCount(); + BytesRef scratchPad = new BytesRef(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantBytesRefBlockWith(evalValue(vector, 0, scratchPad), positionCount); + } + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendBytesRef(evalValue(vector, p, scratchPad)); } - return eval(page.getPositionCount(), valVector).asBlock(); + return builder.build(); } } - @Override - public long baseRamBytesUsed() { - long baseRamBytesUsed = BASE_RAM_BYTES_USED; - baseRamBytesUsed += val.baseRamBytesUsed(); - return baseRamBytesUsed; + private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ToAscii.process(value, this.scratch, this.codePoint); } - public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) { - try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { - BytesRef valScratch = new BytesRef(); - position: for (int p = 0; p < positionCount; p++) { - switch (valBlock.getValueCount(p)) { - case 0: - result.appendNull(); - continue position; - case 1: - break; - default: - warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); - result.appendNull(); - continue position; + @Override + public Block evalBlock(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef scratchPad = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + BytesRef value = evalValue(block, i, scratchPad); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendBytesRef(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); } - BytesRef val = valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch); - result.appendBytesRef(ToAscii.process(this.scratch, this.codePoint, val)); } - return result.build(); + return builder.build(); } } - public BytesRefVector eval(int positionCount, BytesRefVector valVector) { - try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { - BytesRef valScratch = new BytesRef(); - position: for (int p = 0; p < positionCount; p++) { - BytesRef val = valVector.getBytesRef(p, valScratch); - result.appendBytesRef(ToAscii.process(this.scratch, this.codePoint, val)); + private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ToAscii.process(value, this.scratch, this.codePoint); + } + + private Block evalOrdinals(OrdinalBytesRefVector v) { + int positionCount = v.getDictionaryVector().getPositionCount(); + BytesRef scratchPad = new BytesRef(); + try (BytesRefVector.Builder builder = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendBytesRef(evalValue(v.getDictionaryVector(), p, scratchPad)); } - return result.build(); + IntVector ordinals = v.getOrdinalsVector(); + ordinals.incRef(); + return new OrdinalBytesRefVector(ordinals, builder.build()).asBlock(); } } @@ -109,42 +131,37 @@ public String toString() { @Override public void close() { - Releasables.closeExpectNoException(scratch, val); + Releasables.closeExpectNoException(val, scratch); } - private Warnings warnings() { - if (warnings == null) { - this.warnings = Warnings.createWarnings( - driverContext.warningsMode(), - source.source().getLineNumber(), - source.source().getColumnNumber(), - source.text() - ); - } - return warnings; + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += val.baseRamBytesUsed(); + return baseRamBytesUsed; } - static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Source source; + private final EvalOperator.ExpressionEvaluator.Factory val; + private final Function scratch; private final Function codePoint; - private final EvalOperator.ExpressionEvaluator.Factory val; - - public Factory(Source source, Function scratch, - Function codePoint, - EvalOperator.ExpressionEvaluator.Factory val) { + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory val, + Function scratch, + Function codePoint) { this.source = source; + this.val = val; this.scratch = scratch; this.codePoint = codePoint; - this.val = val; } @Override public ToAsciiEvaluator get(DriverContext context) { - return new ToAsciiEvaluator(source, scratch.apply(context), codePoint.apply(context), val.get(context), context); + return new ToAsciiEvaluator(source, val.get(context), scratch.apply(context), codePoint.apply(context), context); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index e354232965e49..dc576ae16fafc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -175,7 +175,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StY; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin; -import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; @@ -198,6 +197,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split; import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToLower; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToUpper; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Trim; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 978e9f64ace7e..99faa1d372a99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -36,7 +36,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Tau; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; -import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; @@ -54,6 +53,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split; import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToAscii; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToLower; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToUpper; import org.elasticsearch.xpack.esql.expression.predicate.logical.And; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java index 5c555872ad682..9a0f9c8dc222a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ToAscii.java @@ -12,7 +12,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.ConvertEvaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; @@ -85,9 +85,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { var field = toEvaluator.apply(field()); return new ToAsciiEvaluator.Factory( source(), + field, context -> new BreakingBytesRefBuilder(context.breaker(), "to_ascii"), - context -> new UnicodeUtil.UTF8CodePoint(), - field + context -> new UnicodeUtil.UTF8CodePoint() ); } @@ -101,11 +101,11 @@ protected NodeInfo info() { return NodeInfo.create(this, ToAscii::new, field()); } - @Evaluator + @ConvertEvaluator static BytesRef process( + BytesRef val, @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, - @Fixed(includeInToString = false, scope = THREAD_LOCAL) UnicodeUtil.UTF8CodePoint codePoint, - BytesRef val + @Fixed(includeInToString = false, scope = THREAD_LOCAL) UnicodeUtil.UTF8CodePoint codePoint ) { // Pre-reserve at least as much as the input. scratch.grow(val.length);