Skip to content

Commit

Permalink
[WASM] Add more complete handling toLowerCase/toUpperCase for non-str…
Browse files Browse the repository at this point in the history
…ingref.

This is closer to original Android based implementation but delegates to JS in difficult cases instead of ICU4C.

PiperOrigin-RevId: 519809176
  • Loading branch information
gkdn authored and Copybara-Service committed Mar 27, 2023
1 parent 5752855 commit 24a7b79
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
13 changes: 13 additions & 0 deletions jre/java/super-wasm-alt/java/lang/CaseMapper.java
Expand Up @@ -27,6 +27,9 @@ class CaseMapper {
"\u000b\u0000\f\u0000\r"
+ "\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f!\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y";

private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130';
private static final char GREEK_CAPITAL_SIGMA = '\u03a3';

/*
* Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, this
* class should be changed so that you instantiate it with the String and its value, offset, and
Expand All @@ -44,6 +47,12 @@ public static String toLowerCase(String s, char[] value, int offset, int count)
int newCount = 0;
for (int i = offset, end = offset + count; i < end; ++i) {
char ch = value[i];
if (ch == LATIN_CAPITAL_I_WITH_DOT
|| ch == GREEK_CAPITAL_SIGMA
|| Character.isHighSurrogate(ch)) {
// Punt these hard cases.
return String.fromJsString(s.toJsString().toLowerCase());
}
char newCh = charToLowerCase(ch);
if (newValue == null && ch != newCh) {
newValue = new char[count]; // The result can't be longer than the input.
Expand Down Expand Up @@ -107,6 +116,10 @@ public static String toUpperCase(String s, char[] value, int offset, int count)
int i = 0;
for (int o = offset, end = offset + count; o < end; o++) {
char ch = value[o];
if (Character.isHighSurrogate(ch)) {
// Punt these hard cases.
return String.fromJsString(s.toJsString().toUpperCase());
}
int index = upperIndex(ch);
if (index == -1) {
if (output != null && i >= output.length) {
Expand Down
4 changes: 4 additions & 0 deletions jre/java/super-wasm-alt/java/lang/String.java
Expand Up @@ -1009,6 +1009,10 @@ public static class NativeString {

native NativeString replace(WasmExtern regex, NativeString replace);

native NativeString toLowerCase();

native NativeString toUpperCase();

native NativeString toLocaleLowerCase();

native NativeString toLocaleUpperCase();
Expand Down
15 changes: 13 additions & 2 deletions jre/javatests/com/google/j2cl/jre/java/lang/StringTest.java
Expand Up @@ -738,7 +738,12 @@ public void testLowerCase() {
}

public void testLowerCaseNonAscii() {
assertEquals("öçşğü", hideFromCompiler("ÖÇŞĞÜ").toLowerCase()); // a.k.a "Turkey Test"
assertEquals("i̇öçşğü", hideFromCompiler("İÖÇŞĞÜ").toLowerCase()); // a.k.a "Turkey Test"

// Greek sigma
assertEquals("\u03C3", hideFromCompiler("\u03A3").toLowerCase());
assertEquals("abc\u03C2", hideFromCompiler("ABC\u03A3").toLowerCase());
assertEquals("abc\u03C3abc", hideFromCompiler("ABC\u03A3ABC").toLowerCase());
}

public void testMatch() {
Expand Down Expand Up @@ -1064,7 +1069,13 @@ public void testUpperCase() {
}

public void testUpperCaseNonAscii() {
assertEquals("ÖÇŞĞÜ", hideFromCompiler("öçşğü").toUpperCase()); // a.k.a "Turkey Test"
assertEquals("İÖÇŞĞÜ", hideFromCompiler("i̇öçşğü").toUpperCase()); // a.k.a "Turkey Test"

assertEquals("SS", hideFromCompiler("ß").toUpperCase());
assertEquals("ʼN", hideFromCompiler("ʼn").toUpperCase());

// surrogate example
assertEquals("\uD801\uDC1c", hideFromCompiler("\uD801\uDC44").toUpperCase());
}

/*
Expand Down

0 comments on commit 24a7b79

Please sign in to comment.