Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(0.24.0) Fix String equalsIgnoreCase() and regionMatches() implementations #11497

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 85 additions & 42 deletions jcl/src/java.base/share/classes/java/lang/String.java
Original file line number Diff line number Diff line change
Expand Up @@ -1454,29 +1454,45 @@ public int compareTo(String string) {
return s1len - s2len;
}

private int compareValue(int codepoint) {
private static int compareValue(int codepoint) {
if ('A' <= codepoint && codepoint <= 'Z') {
return codepoint + ('a' - 'A');
}

return Character.toLowerCase(Character.toUpperCase(codepoint));
}

private char compareValue(char c) {
private static char compareValue(char c) {
if ('A' <= c && c <= 'Z') {
return (char) (c + ('a' - 'A'));
}

return Character.toLowerCase(Character.toUpperCase(c));
}

private char compareValue(byte b) {
private static char compareValue(byte b) {
if ('A' <= b && b <= 'Z') {
return (char)(helpers.byteToCharUnsigned(b) + ('a' - 'A'));
}
return Character.toLowerCase(Character.toUpperCase(helpers.byteToCharUnsigned(b)));
}

private static boolean charValuesEqualIgnoreCase(char c1, char c2) {
boolean charValuesEqual = false;
char c1upper = (char) toUpperCase(c1);
char c2upper = (char) toUpperCase(c2);

// If at least one char is ASCII, converting to upper cases then compare should be sufficient.
// If both chars are not in ASCII char set, need to convert to lower case and compare as well.
if (((c1 <= 255 || c2 <= 255) && (c1upper == c2upper))
|| (toLowerCase(c1upper) == toLowerCase(c2upper))
) {
charValuesEqual = true;
}

return charValuesEqual;
}

/**
* Compare the receiver to the specified String to determine the relative ordering when the case of the characters is ignored.
*
Expand Down Expand Up @@ -1789,34 +1805,39 @@ public boolean equalsIgnoreCase(String string) {
byte[] s2Value = s2.value;

if (enableCompression && (null == compressionFlag || (s1.coder | s2.coder) == LATIN1)) {
// Compare the last chars. Under string compression, the compressible char set obeys 1-1 mapping for upper/lower
// case, converting to lower cases then compare should be sufficient.
// Compare the last chars.
// In order to tell 2 chars are different:
// Under string compression, the compressible char set obeys 1-1 mapping for upper/lower case,
// converting to upper cases then compare should be sufficient.
byte byteAtO1Last = helpers.getByteFromArrayByIndex(s1Value, s1len - 1);
byte byteAtO2Last = helpers.getByteFromArrayByIndex(s2Value, s1len - 1);

if (byteAtO1Last != byteAtO2Last &&
toUpperCase(helpers.byteToCharUnsigned(byteAtO1Last)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2Last))) {
if ((byteAtO1Last != byteAtO2Last)
&& (toUpperCase(helpers.byteToCharUnsigned(byteAtO1Last)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2Last)))
) {
return false;
}

while (o1 < end - 1) {
byte byteAtO1 = helpers.getByteFromArrayByIndex(s1Value, o1++);
byte byteAtO2 = helpers.getByteFromArrayByIndex(s2Value, o2++);

if (byteAtO1 != byteAtO2 &&
toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2))) {
if ((byteAtO1 != byteAtO2)
&& (toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2)))
) {
return false;
}
}
} else {
// Compare the last chars. Under string compression, the compressible char set obeys 1-1 mapping for upper/lower
// case, converting to lower cases then compare should be sufficient.
// Compare the last chars.
// In order to tell 2 chars are different:
// If at least one char is ASCII, converting to upper cases then compare should be sufficient.
// If both chars are not in ASCII char set, need to convert to lower case and compare as well.
char charAtO1Last = s1.charAtInternal(s1len - 1, s1Value);
char charAtO2Last = s2.charAtInternal(s1len - 1, s2Value);

if (charAtO1Last != charAtO2Last
&& toUpperCase(charAtO1Last) != toUpperCase(charAtO2Last)
&& ((charAtO1Last <= 255 && charAtO2Last <= 255) || Character.toLowerCase(charAtO1Last) != Character.toLowerCase(charAtO2Last))
if ((charAtO1Last != charAtO2Last)
&& !charValuesEqualIgnoreCase(charAtO1Last, charAtO2Last)
/*[IF JAVA_SPEC_VERSION >= 16]*/
&& (!Character.isLowSurrogate(charAtO1Last) || !Character.isLowSurrogate(charAtO2Last))
/*[ENDIF] JAVA_SPEC_VERSION >= 16 */
Expand All @@ -1836,17 +1857,19 @@ && toUpperCase(charAtO1Last) != toUpperCase(charAtO2Last)
if (Character.isHighSurrogate(charAtO1) && Character.isHighSurrogate(charAtO2) && (o1 < end)) {
int codepointAtO1 = Character.toCodePoint(charAtO1, s1.charAtInternal(o1++, s1Value));
int codepointAtO2 = Character.toCodePoint(charAtO2, s2.charAtInternal(o2++, s2Value));
if ((codepointAtO1 != codepointAtO2) && (compareValue(codepointAtO1) != compareValue(codepointAtO2))) {
if ((codepointAtO1 != codepointAtO2)
&& (compareValue(codepointAtO1) != compareValue(codepointAtO2))
) {
return false;
} else {
continue;
}
}
/*[ENDIF] JAVA_SPEC_VERSION >= 16 */

if (charAtO1 != charAtO2 &&
toUpperCase(charAtO1) != toUpperCase(charAtO2) &&
((charAtO1 <= 255 && charAtO2 <= 255) || Character.toLowerCase(charAtO1) != Character.toLowerCase(charAtO2))) {
if ((charAtO1 != charAtO2)
&& (!charValuesEqualIgnoreCase(charAtO1, charAtO2))
) {
return false;
}
}
Expand Down Expand Up @@ -2470,9 +2493,9 @@ public boolean regionMatches(boolean ignoreCase, int thisStart, String string, i
byte byteAtO1 = helpers.getByteFromArrayByIndex(s1Value, o1++);
byte byteAtO2 = helpers.getByteFromArrayByIndex(s2Value, o2++);

if (byteAtO1 != byteAtO2 &&
toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2)) &&
toLowerCase(helpers.byteToCharUnsigned(byteAtO1)) != toLowerCase(helpers.byteToCharUnsigned(byteAtO2))) {
if ((byteAtO1 != byteAtO2)
&& (!charValuesEqualIgnoreCase(helpers.byteToCharUnsigned(byteAtO1), helpers.byteToCharUnsigned(byteAtO2)))
) {
return false;
}
}
Expand All @@ -2485,15 +2508,17 @@ public boolean regionMatches(boolean ignoreCase, int thisStart, String string, i
if (Character.isHighSurrogate(charAtO1) && Character.isHighSurrogate(charAtO2) && (o1 < end)) {
int codepointAtO1 = Character.toCodePoint(charAtO1, s1.charAtInternal(o1++, s1Value));
int codepointAtO2 = Character.toCodePoint(charAtO2, s2.charAtInternal(o2++, s2Value));
if ((codepointAtO1 != codepointAtO2) && (compareValue(codepointAtO1) != compareValue(codepointAtO2))) {
if ((codepointAtO1 != codepointAtO2)
&& (compareValue(codepointAtO1) != compareValue(codepointAtO2))
) {
return false;
}
}
/*[ENDIF] JAVA_SPEC_VERSION >= 16 */

if (charAtO1 != charAtO2 &&
toUpperCase(charAtO1) != toUpperCase(charAtO2) &&
toLowerCase(charAtO1) != toLowerCase(charAtO2)) {
if ((charAtO1 != charAtO2)
&& (!charValuesEqualIgnoreCase(charAtO1, charAtO2))
) {
return false;
}
}
Expand Down Expand Up @@ -5497,6 +5522,22 @@ private static char compareValue(byte b) {
return Character.toLowerCase(Character.toUpperCase(helpers.byteToCharUnsigned(b)));
}

private static boolean charValuesEqualIgnoreCase(char c1, char c2) {
boolean charValuesEqual = false;
char c1upper = (char) toUpperCase(c1);
char c2upper = (char) toUpperCase(c2);

// If at least one char is ASCII, converting to upper cases then compare should be sufficient.
// If both chars are not in ASCII char set, need to convert to lower case and compare as well.
if (((c1 <= 255 || c2 <= 255) && (c1upper == c2upper))
|| (toLowerCase(c1upper) == toLowerCase(c2upper))
) {
charValuesEqual = true;
}

return charValuesEqual;
}

/**
* Compare the receiver to the specified String to determine the relative ordering when the case of the characters is ignored.
*
Expand Down Expand Up @@ -5807,21 +5848,23 @@ public boolean equalsIgnoreCase(String string) {
// Compare the last chars.
// In order to tell 2 chars are different:
// Under string compression, the compressible char set obeys 1-1 mapping for upper/lower case,
// converting to lower cases then compare should be sufficient.
// converting to upper cases then compare should be sufficient.
byte byteAtO1Last = helpers.getByteFromArrayByIndex(s1Value, s1len - 1);
byte byteAtO2Last = helpers.getByteFromArrayByIndex(s2Value, s1len - 1);

if (byteAtO1Last != byteAtO2Last
&& toUpperCase(helpers.byteToCharUnsigned(byteAtO1Last)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2Last))) {
if ((byteAtO1Last != byteAtO2Last)
&& (toUpperCase(helpers.byteToCharUnsigned(byteAtO1Last)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2Last)))
) {
return false;
}

while (o1 < end - 1) {
byte byteAtO1 = helpers.getByteFromArrayByIndex(s1Value, o1++);
byte byteAtO2 = helpers.getByteFromArrayByIndex(s2Value, o2++);

if (byteAtO1 != byteAtO2
&& toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2))) {
if ((byteAtO1 != byteAtO2)
&& (toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2)))
) {
return false;
}
}
Expand All @@ -5833,19 +5876,19 @@ && toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byte
char charAtO1Last = s1.charAtInternal(s1len - 1, s1Value);
char charAtO2Last = s2.charAtInternal(s1len - 1, s2Value);

if (charAtO1Last != charAtO2Last
&& toUpperCase(charAtO1Last) != toUpperCase(charAtO2Last)
&& ((charAtO1Last <= 255 && charAtO2Last <= 255) || Character.toLowerCase(charAtO1Last) != Character.toLowerCase(charAtO2Last))) {
if ((charAtO1Last != charAtO2Last)
&& (!charValuesEqualIgnoreCase(charAtO1Last, charAtO2Last))
) {
return false;
}

while (o1 < end - 1) {
char charAtO1 = s1.charAtInternal(o1++, s1Value);
char charAtO2 = s2.charAtInternal(o2++, s2Value);

if (charAtO1 != charAtO2
&& toUpperCase(charAtO1) != toUpperCase(charAtO2)
&& ((charAtO1 <= 255 && charAtO2 <= 255) || Character.toLowerCase(charAtO1) != Character.toLowerCase(charAtO2))) {
if ((charAtO1 != charAtO2)
&& (!charValuesEqualIgnoreCase(charAtO1, charAtO2))
) {
return false;
}
}
Expand Down Expand Up @@ -6561,9 +6604,9 @@ public boolean regionMatches(boolean ignoreCase, int thisStart, String string, i
byte byteAtO1 = helpers.getByteFromArrayByIndex(s1Value, o1++);
byte byteAtO2 = helpers.getByteFromArrayByIndex(s2Value, o2++);

if (byteAtO1 != byteAtO2
&& toUpperCase(helpers.byteToCharUnsigned(byteAtO1)) != toUpperCase(helpers.byteToCharUnsigned(byteAtO2))
&& toLowerCase(helpers.byteToCharUnsigned(byteAtO1)) != toLowerCase(helpers.byteToCharUnsigned(byteAtO2))) {
if ((byteAtO1 != byteAtO2)
&& (!charValuesEqualIgnoreCase(helpers.byteToCharUnsigned(byteAtO1), helpers.byteToCharUnsigned(byteAtO2)))
) {
return false;
}
}
Expand All @@ -6572,9 +6615,9 @@ && toLowerCase(helpers.byteToCharUnsigned(byteAtO1)) != toLowerCase(helpers.byte
char charAtO1 = s1.charAtInternal(o1++, s1Value);
char charAtO2 = s2.charAtInternal(o2++, s2Value);

if (charAtO1 != charAtO2
&& toUpperCase(charAtO1) != toUpperCase(charAtO2)
&& toLowerCase(charAtO1) != toLowerCase(charAtO2)) {
if ((charAtO1 != charAtO2)
&& (!charValuesEqualIgnoreCase(charAtO1, charAtO2))
) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ public void test_compareToIgnoreCase() {
AssertJUnit.assertTrue("0xbf should not compare = to 'ss'", "\u00df".compareToIgnoreCase("ss") != 0);
AssertJUnit.assertTrue("0x130 should compare = to 'i'", "\u0130".compareToIgnoreCase("i") == 0);
AssertJUnit.assertTrue("0x131 should compare = to 'i'", "\u0131".compareToIgnoreCase("i") == 0);

AssertJUnit.assertTrue("Turkish 'ı' at beginning of string returned incorrect value for first = second",
"\u0131\u0130j".compareToIgnoreCase("\u0069\u0049J") == 0);
AssertJUnit.assertTrue("Turkish 'ı' in middle of string returned incorrect value for first = second",
"J\u0131j".compareToIgnoreCase("j\u0130J") == 0);
AssertJUnit.assertTrue("Turkish 'ı' at end of string returned incorrect value for first = second",
"j\u0131".compareToIgnoreCase("J\u0130") == 0);

if (VersionCheck.major() >= 16) {
AssertJUnit.assertTrue("DESERET CAPITAL LETTER LONG I should compare == to DESERET SMALL LETTER LONG I",
"\ud801\udc00".compareToIgnoreCase("\ud801\udc28") == 0);
Expand Down Expand Up @@ -585,6 +593,14 @@ public void test_equals() {
@Test
public void test_equalsIgnoreCase() {
AssertJUnit.assertTrue("lc version returned unequal to uc", hwlc.equalsIgnoreCase(hwuc));

AssertJUnit.assertTrue("Turkish 'ı' at beginning of string lc version returned unequal to uc",
"\u0131\u0130j".equalsIgnoreCase("\u0069\u0049J"));
AssertJUnit.assertTrue("Turkish 'ı' in middle of string lc version returned unequal to uc",
"J\u0131j".equalsIgnoreCase("j\u0130J"));
AssertJUnit.assertTrue("Turkish 'ı' at end of string lc version returned unequal to uc",
"j\u0131".equalsIgnoreCase("J\u0130"));

if (VersionCheck.major() >= 16) {
AssertJUnit.assertTrue("DESERET CAPITAL LETTER LONG I returned unequal to DESERET SMALL LETTER LONG I",
"\ud801\udc00".equalsIgnoreCase("\ud801\udc28"));
Expand Down Expand Up @@ -1043,6 +1059,14 @@ public void test_regionMatches2() {
AssertJUnit.assertTrue("Different regions returned true", !hw1.regionMatches(true, 2, bogusString, 2, 5));
AssertJUnit.assertTrue("identical regions failed comparison with different cases",
hw1.regionMatches(false, 2, hw2, 2, 5));

AssertJUnit.assertTrue("Turkish 'ı' at beginning of string failed comparison with different cases",
"\u0131\u0130j".regionMatches(true, 0, "\u0069\u0049J", 0, 3));
AssertJUnit.assertTrue("Turkish 'ı' in middle of string failed comparison with different cases",
"J\u0131j".regionMatches(true, 0, "j\u0130J", 0, 3));
AssertJUnit.assertTrue("Turkish 'ı' at end of string failed comparison with different cases",
"jJ\u0131".regionMatches(true, 0, "Jj\u0130", 0, 3));

if (VersionCheck.major() >= 16) {
AssertJUnit.assertTrue("DESERET CAPITAL LETTER LONG I and DESERET SMALL LETTER LONG I should match when case insensitive",
"\ud801\udc00".regionMatches(true, 0, "\ud801\udc28", 0 ,2));
Expand Down