Skip to content

Commit

Permalink
unicode/utf16: add RuneLen
Browse files Browse the repository at this point in the history
RuneLen returns the number of 16-bit words required to encode a rune.

Fixes golang#44940
  • Loading branch information
bouk committed Mar 23, 2022
1 parent 1e34c00 commit 9dc4c95
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions api/next/44940.txt
@@ -0,0 +1 @@
pkg unicode/utf16, func RuneLen(int32) int #44940
13 changes: 13 additions & 0 deletions src/unicode/utf16/utf16.go
Expand Up @@ -31,6 +31,19 @@ func IsSurrogate(r rune) bool {
return surr1 <= r && r < surr3
}

// RuneLen returns the number of 16-bit words required to encode the rune.
// It returns -1 if the rune is not a valid value to encode in UTF-16.
func RuneLen(r rune) int {
switch {
case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
return 1
case surrSelf <= r && r <= maxRune:
return 2
default:
return -1
}
}

// DecodeRune returns the UTF-16 decoding of a surrogate pair.
// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns
// the Unicode replacement code point U+FFFD.
Expand Down
25 changes: 25 additions & 0 deletions src/unicode/utf16/utf16_test.go
Expand Up @@ -21,6 +21,31 @@ func TestConstants(t *testing.T) {
}
}

type runeLenTest struct {
r rune
size int
}

var runelentests = []runeLenTest{
{0, 1},
{'e', 1},
{'é', 1},
{'😂', 2},
{0xD800, -1},
{0xDFFF, -1},
{MaxRune, 2},
{MaxRune + 1, -1},
{-1, -1},
}

func TestRuneLen(t *testing.T) {
for _, tt := range runelentests {
if size := RuneLen(tt.r); size != tt.size {
t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
}
}
}

type encodeTest struct {
in []rune
out []uint16
Expand Down

0 comments on commit 9dc4c95

Please sign in to comment.