From 75fafc91fee010ca1a7c719d07df408bad265b88 Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Fri, 27 Mar 2026 16:20:13 -0500 Subject: [PATCH 1/7] Add string extensions quote and reverse Add `strings.quote` and `reverse` extensions to match Go implementations. --- .../cel/extensions/CelStringExtensions.java | 76 +++++++++++++++++++ .../extensions/CelStringExtensionsTest.java | 54 +++++++++++++ 2 files changed, 130 insertions(+) diff --git a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java index 10caa7db8..faf30c2b2 100644 --- a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java +++ b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java @@ -137,6 +137,17 @@ public enum Function { SimpleType.STRING, SimpleType.STRING)), CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)), + QUOTE( + CelFunctionDecl.newFunctionDeclaration( + "strings.quote", + CelOverloadDecl.newGlobalOverload( + "strings_quote", + "Takes the given string and makes it safe to print (without any formatting" + + " due to escape sequences). If any invalid UTF-8 characters are" + + " encountered, they are replaced with \\uFFFD.", + SimpleType.STRING, + ImmutableList.of(SimpleType.STRING))), + CelFunctionBinding.from("strings_quote", String.class, CelStringExtensions::quote)), REPLACE( CelFunctionDecl.newFunctionDeclaration( "replace", @@ -164,6 +175,16 @@ public enum Function { "string_replace_string_string_int", ImmutableList.of(String.class, String.class, String.class, Long.class), CelStringExtensions::replace)), + REVERSE( + CelFunctionDecl.newFunctionDeclaration( + "reverse", + CelOverloadDecl.newMemberOverload( + "string_reverse", + "Returns a new string whose characters are the same as the target string," + + " only formatted in reverse order.", + SimpleType.STRING, + SimpleType.STRING)), + CelFunctionBinding.from("string_reverse", String.class, CelStringExtensions::reverse)), SPLIT( CelFunctionDecl.newFunctionDeclaration( "split", @@ -449,6 +470,57 @@ private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr, return -1L; } + private static String quote(String s) { + StringBuilder sb = new StringBuilder(s.length() + 2); + sb.append('"'); + for (int i = 0; i < s.length(); ) { + int codePoint = s.codePointAt(i); + if (!Character.isValidCodePoint(codePoint) + || Character.isLowSurrogate(s.charAt(i)) + || (Character.isHighSurrogate(s.charAt(i)) + && (i + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(i + 1))))) { + sb.append('\uFFFD'); + i++; + continue; + } + switch (codePoint) { + case '\u0007': + sb.append("\\a"); + break; + case '\b': + sb.append("\\b"); + break; + case '\f': + sb.append("\\f"); + break; + case '\n': + sb.append("\\n"); + break; + case '\r': + sb.append("\\r"); + break; + case '\t': + sb.append("\\t"); + break; + case '\u000B': + sb.append("\\v"); + break; + case '\\': + sb.append("\\\\"); + break; + case '"': + sb.append("\\\""); + break; + default: + sb.appendCodePoint(codePoint); + break; + } + i += Character.charCount(codePoint); + } + sb.append('"'); + return sb.toString(); + } + private static String replaceAll(Object[] objects) { return replace((String) objects[0], (String) objects[1], (String) objects[2], -1); } @@ -504,6 +576,10 @@ private static String replace(String text, String searchString, String replaceme return sb.append(textCpa.slice(start, textCpa.length())).toString(); } + private static String reverse(String s) { + return new StringBuilder(s).reverse().toString(); + } + private static List split(String str, String separator) { return split(str, separator, Integer.MAX_VALUE); } diff --git a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java index 6ea9b702c..ad0d6d679 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java @@ -70,7 +70,9 @@ public void library() { "lastIndexOf", "lowerAscii", "replace", + "reverse", "split", + "strings.quote", "substring", "trim", "upperAscii"); @@ -1467,6 +1469,58 @@ public void stringExtension_functionSubset_success() throws Exception { assertThat(evaluatedResult).isEqualTo(true); } + @Test + @TestParameters("{string: 'abcd', expectedResult: 'dcba'}") + @TestParameters("{string: '', expectedResult: ''}") + @TestParameters("{string: 'a', expectedResult: 'a'}") + @TestParameters("{string: 'hello world', expectedResult: 'dlrow olleh'}") + @TestParameters("{string: 'abκ°€cd', expectedResult: 'dcκ°€ba'}") + public void reverse_success(String string, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + public void reverse_unicode() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", "πŸ˜πŸ˜‘πŸ˜¦")); + + assertThat(evaluatedResult).isEqualTo("πŸ˜¦πŸ˜‘πŸ˜"); + } + + @Test + @TestParameters("{string: 'hello', expectedResult: '\"hello\"'}") + @TestParameters("{string: '', expectedResult: '\"\"'}") + @TestParameters("{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}") + public void quote_success(String string, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + public void quote_escapesSpecialCharacters() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = + program.eval( + ImmutableMap.of( + "s", "\u0007bell\u000Bvtab\bback\ffeed\rret\nline\ttab\\slash κ°€ 😁")); + + assertThat(evaluatedResult) + .isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash κ°€ 😁\""); + } + @Test public void stringExtension_compileUnallowedFunction_throws() { CelCompiler celCompiler = From 6207811efff2d3b1d3dd5f0528e67906a90a955c Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Fri, 27 Mar 2026 17:48:43 -0500 Subject: [PATCH 2/7] Enable strings.quote conformance tests --- conformance/src/test/java/dev/cel/conformance/BUILD.bazel | 2 -- 1 file changed, 2 deletions(-) diff --git a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel index fb2b1a159..ea9041433 100644 --- a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel +++ b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel @@ -120,7 +120,6 @@ _TESTS_TO_SKIP_LEGACY = [ # Skip until fixed. "fields/qualified_identifier_resolution/map_value_repeat_key_heterogeneous", # TODO: Add strings.format and strings.quote. - "string_ext/quote", "string_ext/format", "string_ext/format_errors", @@ -149,7 +148,6 @@ _TESTS_TO_SKIP_LEGACY = [ _TESTS_TO_SKIP_PLANNER = [ # TODO: Add strings.format and strings.quote. - "string_ext/quote", "string_ext/format", "string_ext/format_errors", From 0e1a30f5eb31d0f1a313a755d253ca0f1a3389d8 Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Fri, 27 Mar 2026 17:55:53 -0500 Subject: [PATCH 3/7] Fix comments in bazel TESTS_TO_SKIP --- conformance/src/test/java/dev/cel/conformance/BUILD.bazel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel index ea9041433..c0e7ad2bc 100644 --- a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel +++ b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel @@ -119,7 +119,7 @@ _TESTS_TO_SKIP_LEGACY = [ # Skip until fixed. "fields/qualified_identifier_resolution/map_value_repeat_key_heterogeneous", - # TODO: Add strings.format and strings.quote. + # TODO: Add strings.format. "string_ext/format", "string_ext/format_errors", @@ -147,7 +147,7 @@ _TESTS_TO_SKIP_LEGACY = [ ] _TESTS_TO_SKIP_PLANNER = [ - # TODO: Add strings.format and strings.quote. + # TODO: Add strings.format. "string_ext/format", "string_ext/format_errors", From 57e795ad58d77dd462864a8e322fecd4a56514be Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Fri, 27 Mar 2026 18:13:28 -0500 Subject: [PATCH 4/7] Attempt to fix failing test --- .../src/test/java/dev/cel/extensions/CelExtensionsTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java index 61922f70f..192630ea3 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java @@ -168,6 +168,7 @@ public void getAllFunctionNames() { "join", "lastIndexOf", "lowerAscii", + "strings.quote", "replace", "split", "substring", From 943ae333723ed1ffe05975cdd4437d1b35a546fe Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Sun, 29 Mar 2026 10:43:53 -0500 Subject: [PATCH 5/7] Add additional tests and address review feedback --- .../cel/extensions/CelStringExtensions.java | 18 +++++++-- .../extensions/CelStringExtensionsTest.java | 37 +++++++++++++++++-- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java index faf30c2b2..e89b81071 100644 --- a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java +++ b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java @@ -475,10 +475,7 @@ private static String quote(String s) { sb.append('"'); for (int i = 0; i < s.length(); ) { int codePoint = s.codePointAt(i); - if (!Character.isValidCodePoint(codePoint) - || Character.isLowSurrogate(s.charAt(i)) - || (Character.isHighSurrogate(s.charAt(i)) - && (i + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(i + 1))))) { + if (isMalformedUtf16(s, i, codePoint)) { sb.append('\uFFFD'); i++; continue; @@ -521,6 +518,19 @@ private static String quote(String s) { return sb.toString(); } + private static boolean isMalformedUtf16(String s, int index, int codePoint) { + char currentChar = s.charAt(index); + if (!Character.isValidCodePoint(codePoint)) { + return true; + } + if (Character.isLowSurrogate(currentChar)) { + return true; + } + // Check for unpaired high surrogate + return Character.isHighSurrogate(currentChar) + && (index + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(index + 1))); + } + private static String replaceAll(Object[] objects) { return replace((String) objects[0], (String) objects[1], (String) objects[2], -1); } diff --git a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java index ad0d6d679..58a1bff99 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java @@ -33,6 +33,8 @@ import dev.cel.runtime.CelEvaluationException; import dev.cel.runtime.CelRuntime; import dev.cel.runtime.CelRuntimeFactory; + +import java.nio.charset.StandardCharsets; import java.util.List; import org.junit.Test; import org.junit.runner.RunWith; @@ -1485,19 +1487,23 @@ public void reverse_success(String string, String expectedResult) throws Excepti } @Test - public void reverse_unicode() throws Exception { + @TestParameters("{string: 'πŸ˜πŸ˜‘πŸ˜¦', expectedResult: 'πŸ˜¦πŸ˜‘πŸ˜'}") + @TestParameters("{string: '\u180e\u200b\u200c\u200d\u2060\ufeff', expectedResult: '\ufeff\u2060\u200d\u200c\u200b\u180e'}") + public void reverse_unicode(String string, String expectedResult) throws Exception { CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); CelRuntime.Program program = RUNTIME.createProgram(ast); - Object evaluatedResult = program.eval(ImmutableMap.of("s", "πŸ˜πŸ˜‘πŸ˜¦")); + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); - assertThat(evaluatedResult).isEqualTo("πŸ˜¦πŸ˜‘πŸ˜"); + assertThat(evaluatedResult).isEqualTo(expectedResult); } @Test @TestParameters("{string: 'hello', expectedResult: '\"hello\"'}") @TestParameters("{string: '', expectedResult: '\"\"'}") @TestParameters("{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}") + @TestParameters("{string: 'ends with \\\\', expectedResult: '\"ends with \\\\\\\\\"'}") + @TestParameters("{string: '\\\\ starts with', expectedResult: '\"\\\\\\\\ starts with\"'}") public void quote_success(String string, String expectedResult) throws Exception { CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); CelRuntime.Program program = RUNTIME.createProgram(ast); @@ -1507,6 +1513,18 @@ public void quote_success(String string, String expectedResult) throws Exception assertThat(evaluatedResult).isEqualTo(expectedResult); } + @Test + public void quote_singleWithDoubleQuotes() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile( + "strings.quote('single-quote with \"double quote\"') == \"\\\"single-quote with \\\\\\\"double quote\\\\\\\"\\\"\"" + ).getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(); + + assertThat(evaluatedResult).isEqualTo(true); + } + @Test public void quote_escapesSpecialCharacters() throws Exception { CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); @@ -1521,6 +1539,19 @@ public void quote_escapesSpecialCharacters() throws Exception { .isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash κ°€ 😁\""); } + @Test + public void quote_escapesMalformed() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = + program.eval( + ImmutableMap.of( + "s", new String(new byte[]{'f','i','l','l','e','r',' ',(byte)0x9f}, StandardCharsets.UTF_8))); + + assertThat(evaluatedResult).isEqualTo("\"filler \uFFFD\""); + } + @Test public void stringExtension_compileUnallowedFunction_throws() { CelCompiler celCompiler = From 03d0a3621f721e05bbd5eecf6234a588246a397f Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Sun, 29 Mar 2026 11:57:43 -0500 Subject: [PATCH 6/7] Additional malformed unicode tests --- .../extensions/CelStringExtensionsTest.java | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java index 58a1bff99..0a3c595be 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java @@ -1540,16 +1540,28 @@ public void quote_escapesSpecialCharacters() throws Exception { } @Test - public void quote_escapesMalformed() throws Exception { + @TestParameters({"{rawString: !!binary 'ZmlsbGVyIJ8=', expectedResult: '\"filler \uFFFD\"'}"}) // "filler \x9f" + public void quote_escapesMalformed(byte[] rawString, String expectedResult) throws Exception { CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); CelRuntime.Program program = RUNTIME.createProgram(ast); - Object evaluatedResult = - program.eval( - ImmutableMap.of( - "s", new String(new byte[]{'f','i','l','l','e','r',' ',(byte)0x9f}, StandardCharsets.UTF_8))); + Object evaluatedResult = program.eval(ImmutableMap.of("s", new String(rawString, StandardCharsets.UTF_8))); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } - assertThat(evaluatedResult).isEqualTo("\"filler \uFFFD\""); + @Test + public void quote_escapesMalformed_endWithHighSurrogate() throws Exception { + CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst()); + assertThat(program.eval(ImmutableMap.of("s", "end with high surrogate \uD83D"))) + .isEqualTo("\"end with high surrogate \uFFFD\""); + } + + @Test + public void quote_escapesMalformed_unpairedHighSurrogate() throws Exception { + CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst()); + assertThat(program.eval(ImmutableMap.of("s", "bad pair \uD83DA"))) + .isEqualTo("\"bad pair \uFFFDA\""); } @Test From 79ff206b89f6b54767e4dd84f648b049d560ea89 Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Sun, 29 Mar 2026 13:53:13 -0500 Subject: [PATCH 7/7] Add quote and reverse functions to docs --- .../main/java/dev/cel/extensions/README.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/extensions/src/main/java/dev/cel/extensions/README.md b/extensions/src/main/java/dev/cel/extensions/README.md index 10c5217e8..c3fbf8c54 100644 --- a/extensions/src/main/java/dev/cel/extensions/README.md +++ b/extensions/src/main/java/dev/cel/extensions/README.md @@ -474,6 +474,19 @@ Examples: 'TacoCat'.lowerAscii() // returns 'tacocat' 'TacoCΓ†t Xii'.lowerAscii() // returns 'tacocΓ†t xii' +### Quote + +Takes the given string and makes it safe to print (without any formatting due +to escape sequences). +If any invalid UTF-8 characters are encountered, they are replaced with \uFFFD. + + strings.quote() + +Examples: + + strings.quote('single-quote with "double quote"') // returns '"single-quote with \"double quote\""' + strings.quote("two escape sequences \a\n") // returns '"two escape sequences \\a\\n"' + ### Replace Returns a new string based on the target, which replaces the occurrences of a @@ -493,6 +506,20 @@ Examples: 'hello hello'.replace('he', 'we', 1) // returns 'wello hello' 'hello hello'.replace('he', 'we', 0) // returns 'hello hello' +### Reverse + +Returns a new string whose characters are the same as the target string, only +formatted in reverse order. +This function relies on converting strings to Unicode code point arrays in +order to reverse. + + .reverse() -> + +Examples: + + 'gums'.reverse() // returns 'smug' + 'John Smith'.reverse() // returns 'htimS nhoJ' + ### Split Returns a mutable list of strings split from the input by the given separator. The