Skip to content

Commit

Permalink
Fix String.offsetByCodePoints for unpaired surrogates (scala-native#3471
Browse files Browse the repository at this point in the history
)

Previously, String.offsetByCodePoints had an error
when the offsetByCodePoints ends with the unpaired higher surrogate.

This commit fix the bound check in the offsetByCodePoints not to
check the next index is the lower surrogate after the higher surrogate.
  • Loading branch information
tanishiking committed Sep 6, 2023
1 parent 3dd595b commit e51e4e7
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
4 changes: 2 additions & 2 deletions javalib/src/main/scala/java/lang/Character.scala
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ object Character {
}
if (isHighSurrogate(seq(i))) {
val next = i + 1
if (next <= end && isLowSurrogate(seq(next))) {
if (next < end && isLowSurrogate(seq(next))) {
i += 1
}
}
Expand All @@ -396,7 +396,7 @@ object Character {
}
if (isLowSurrogate(seq(i))) {
val prev = i - 1
if (prev >= start && isHighSurrogate(seq(prev))) {
if (prev > start && isHighSurrogate(seq(prev))) {
i -= 1
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,32 @@ class StringTest {
)
}

@Test def offsetByCodePoints(): Unit = {
assertTrue("abc".offsetByCodePoints(0, 3) == 3)
assertTrue("abc".offsetByCodePoints(1, 2) == 3)

assertTrue("abc".offsetByCodePoints(3, -3) == 0)
assertTrue("abc".offsetByCodePoints(3, -2) == 1)

assertTrue("\uD800\uDC00".offsetByCodePoints(0, 1) == 2)
assertTrue("\uD800\uDC00".offsetByCodePoints(1, -1) == 0)
}

@Test def offsetByCodePointsUnpairedSurrogates(): Unit = {
assertTrue("\uD800".offsetByCodePoints(0, 1) == 1)
assertTrue("\uDBFF".offsetByCodePoints(0, 1) == 1)
assertTrue("\uDC00".offsetByCodePoints(0, 1) == 1)
assertTrue("\uDFFF".offsetByCodePoints(0, 1) == 1)

assertTrue("\uD800".offsetByCodePoints(1, -1) == 0)
assertTrue("\uDBFF".offsetByCodePoints(1, -1) == 0)
assertTrue("\uDC00".offsetByCodePoints(1, -1) == 0)
assertTrue("\uDFFF".offsetByCodePoints(1, -1) == 0)

assertTrue("\uD800x".offsetByCodePoints(0, 2) == 2)
assertTrue("x\uD800".offsetByCodePoints(0, 2) == 2)
}

@Test def compareTo(): Unit = {
assertTrue("test".compareTo("utest") < 0)
assertTrue("test".compareTo("test") == 0)
Expand Down

0 comments on commit e51e4e7

Please sign in to comment.