diff --git a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel index fb2b1a159..c0e7ad2bc 100644 --- a/conformance/src/test/java/dev/cel/conformance/BUILD.bazel +++ b/conformance/src/test/java/dev/cel/conformance/BUILD.bazel @@ -119,8 +119,7 @@ _TESTS_TO_SKIP_LEGACY = [ # Skip until fixed. "fields/qualified_identifier_resolution/map_value_repeat_key_heterogeneous", - # TODO: Add strings.format and strings.quote. - "string_ext/quote", + # TODO: Add strings.format. "string_ext/format", "string_ext/format_errors", @@ -148,8 +147,7 @@ _TESTS_TO_SKIP_LEGACY = [ ] _TESTS_TO_SKIP_PLANNER = [ - # TODO: Add strings.format and strings.quote. - "string_ext/quote", + # TODO: Add strings.format. "string_ext/format", "string_ext/format_errors", diff --git a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java index 10caa7db8..e89b81071 100644 --- a/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java +++ b/extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java @@ -137,6 +137,17 @@ public enum Function { SimpleType.STRING, SimpleType.STRING)), CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)), + QUOTE( + CelFunctionDecl.newFunctionDeclaration( + "strings.quote", + CelOverloadDecl.newGlobalOverload( + "strings_quote", + "Takes the given string and makes it safe to print (without any formatting" + + " due to escape sequences). If any invalid UTF-8 characters are" + + " encountered, they are replaced with \\uFFFD.", + SimpleType.STRING, + ImmutableList.of(SimpleType.STRING))), + CelFunctionBinding.from("strings_quote", String.class, CelStringExtensions::quote)), REPLACE( CelFunctionDecl.newFunctionDeclaration( "replace", @@ -164,6 +175,16 @@ public enum Function { "string_replace_string_string_int", ImmutableList.of(String.class, String.class, String.class, Long.class), CelStringExtensions::replace)), + REVERSE( + CelFunctionDecl.newFunctionDeclaration( + "reverse", + CelOverloadDecl.newMemberOverload( + "string_reverse", + "Returns a new string whose characters are the same as the target string," + + " only formatted in reverse order.", + SimpleType.STRING, + SimpleType.STRING)), + CelFunctionBinding.from("string_reverse", String.class, CelStringExtensions::reverse)), SPLIT( CelFunctionDecl.newFunctionDeclaration( "split", @@ -449,6 +470,67 @@ private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr, return -1L; } + private static String quote(String s) { + StringBuilder sb = new StringBuilder(s.length() + 2); + sb.append('"'); + for (int i = 0; i < s.length(); ) { + int codePoint = s.codePointAt(i); + if (isMalformedUtf16(s, i, codePoint)) { + sb.append('\uFFFD'); + i++; + continue; + } + switch (codePoint) { + case '\u0007': + sb.append("\\a"); + break; + case '\b': + sb.append("\\b"); + break; + case '\f': + sb.append("\\f"); + break; + case '\n': + sb.append("\\n"); + break; + case '\r': + sb.append("\\r"); + break; + case '\t': + sb.append("\\t"); + break; + case '\u000B': + sb.append("\\v"); + break; + case '\\': + sb.append("\\\\"); + break; + case '"': + sb.append("\\\""); + break; + default: + sb.appendCodePoint(codePoint); + break; + } + i += Character.charCount(codePoint); + } + sb.append('"'); + return sb.toString(); + } + + private static boolean isMalformedUtf16(String s, int index, int codePoint) { + char currentChar = s.charAt(index); + if (!Character.isValidCodePoint(codePoint)) { + return true; + } + if (Character.isLowSurrogate(currentChar)) { + return true; + } + // Check for unpaired high surrogate + return Character.isHighSurrogate(currentChar) + && (index + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(index + 1))); + } + private static String replaceAll(Object[] objects) { return replace((String) objects[0], (String) objects[1], (String) objects[2], -1); } @@ -504,6 +586,10 @@ private static String replace(String text, String searchString, String replaceme return sb.append(textCpa.slice(start, textCpa.length())).toString(); } + private static String reverse(String s) { + return new StringBuilder(s).reverse().toString(); + } + private static List split(String str, String separator) { return split(str, separator, Integer.MAX_VALUE); } diff --git a/extensions/src/main/java/dev/cel/extensions/README.md b/extensions/src/main/java/dev/cel/extensions/README.md index 10c5217e8..c3fbf8c54 100644 --- a/extensions/src/main/java/dev/cel/extensions/README.md +++ b/extensions/src/main/java/dev/cel/extensions/README.md @@ -474,6 +474,19 @@ Examples: 'TacoCat'.lowerAscii() // returns 'tacocat' 'TacoCÆt Xii'.lowerAscii() // returns 'tacocÆt xii' +### Quote + +Takes the given string and makes it safe to print (without any formatting due +to escape sequences). +If any invalid UTF-8 characters are encountered, they are replaced with \uFFFD. + + strings.quote() + +Examples: + + strings.quote('single-quote with "double quote"') // returns '"single-quote with \"double quote\""' + strings.quote("two escape sequences \a\n") // returns '"two escape sequences \\a\\n"' + ### Replace Returns a new string based on the target, which replaces the occurrences of a @@ -493,6 +506,20 @@ Examples: 'hello hello'.replace('he', 'we', 1) // returns 'wello hello' 'hello hello'.replace('he', 'we', 0) // returns 'hello hello' +### Reverse + +Returns a new string whose characters are the same as the target string, only +formatted in reverse order. +This function relies on converting strings to Unicode code point arrays in +order to reverse. + + .reverse() -> + +Examples: + + 'gums'.reverse() // returns 'smug' + 'John Smith'.reverse() // returns 'htimS nhoJ' + ### Split Returns a mutable list of strings split from the input by the given separator. The diff --git a/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java index 61922f70f..192630ea3 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java @@ -168,6 +168,7 @@ public void getAllFunctionNames() { "join", "lastIndexOf", "lowerAscii", + "strings.quote", "replace", "split", "substring", diff --git a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java index 6ea9b702c..0a3c595be 100644 --- a/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java +++ b/extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java @@ -33,6 +33,8 @@ import dev.cel.runtime.CelEvaluationException; import dev.cel.runtime.CelRuntime; import dev.cel.runtime.CelRuntimeFactory; + +import java.nio.charset.StandardCharsets; import java.util.List; import org.junit.Test; import org.junit.runner.RunWith; @@ -70,7 +72,9 @@ public void library() { "lastIndexOf", "lowerAscii", "replace", + "reverse", "split", + "strings.quote", "substring", "trim", "upperAscii"); @@ -1467,6 +1471,99 @@ public void stringExtension_functionSubset_success() throws Exception { assertThat(evaluatedResult).isEqualTo(true); } + @Test + @TestParameters("{string: 'abcd', expectedResult: 'dcba'}") + @TestParameters("{string: '', expectedResult: ''}") + @TestParameters("{string: 'a', expectedResult: 'a'}") + @TestParameters("{string: 'hello world', expectedResult: 'dlrow olleh'}") + @TestParameters("{string: 'ab가cd', expectedResult: 'dc가ba'}") + public void reverse_success(String string, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + @TestParameters("{string: '😁😑😦', expectedResult: '😦😑😁'}") + @TestParameters("{string: '\u180e\u200b\u200c\u200d\u2060\ufeff', expectedResult: '\ufeff\u2060\u200d\u200c\u200b\u180e'}") + public void reverse_unicode(String string, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + @TestParameters("{string: 'hello', expectedResult: '\"hello\"'}") + @TestParameters("{string: '', expectedResult: '\"\"'}") + @TestParameters("{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}") + @TestParameters("{string: 'ends with \\\\', expectedResult: '\"ends with \\\\\\\\\"'}") + @TestParameters("{string: '\\\\ starts with', expectedResult: '\"\\\\\\\\ starts with\"'}") + public void quote_success(String string, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + public void quote_singleWithDoubleQuotes() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile( + "strings.quote('single-quote with \"double quote\"') == \"\\\"single-quote with \\\\\\\"double quote\\\\\\\"\\\"\"" + ).getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(); + + assertThat(evaluatedResult).isEqualTo(true); + } + + @Test + public void quote_escapesSpecialCharacters() throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = + program.eval( + ImmutableMap.of( + "s", "\u0007bell\u000Bvtab\bback\ffeed\rret\nline\ttab\\slash 가 😁")); + + assertThat(evaluatedResult) + .isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash 가 😁\""); + } + + @Test + @TestParameters({"{rawString: !!binary 'ZmlsbGVyIJ8=', expectedResult: '\"filler \uFFFD\"'}"}) // "filler \x9f" + public void quote_escapesMalformed(byte[] rawString, String expectedResult) throws Exception { + CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); + CelRuntime.Program program = RUNTIME.createProgram(ast); + + Object evaluatedResult = program.eval(ImmutableMap.of("s", new String(rawString, StandardCharsets.UTF_8))); + + assertThat(evaluatedResult).isEqualTo(expectedResult); + } + + @Test + public void quote_escapesMalformed_endWithHighSurrogate() throws Exception { + CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst()); + assertThat(program.eval(ImmutableMap.of("s", "end with high surrogate \uD83D"))) + .isEqualTo("\"end with high surrogate \uFFFD\""); + } + + @Test + public void quote_escapesMalformed_unpairedHighSurrogate() throws Exception { + CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst()); + assertThat(program.eval(ImmutableMap.of("s", "bad pair \uD83DA"))) + .isEqualTo("\"bad pair \uFFFDA\""); + } + @Test public void stringExtension_compileUnallowedFunction_throws() { CelCompiler celCompiler =