Skip to content

Commit a0af29c

Browse files
committed
fix: string slicing now correctly handles multi-byte characters
String slicing was using byte indices on the underlying string while Len() returned rune count, causing corruption when slicing strings containing multi-byte characters (e.g. emojis). For "a😀b"[0:2], we'd slice bytes 0-1 instead of runes 0-1, producing garbled output. Added Runes() method to RadString to centralize the rune conversion (also used by IndexAt), making future caching straightforward.
1 parent 553bb2d commit a0af29c

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

core/testing/slice_string_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,18 @@ print(a[-2:-3])
9898
assertOnlyOutput(t, stdOutBuffer, "\n\n")
9999
assertNoErrors(t)
100100
}
101+
102+
func TestSlice_String_MultiByte(t *testing.T) {
103+
script := `
104+
a = "a😀b"
105+
print(a[0:2])
106+
print(a[1:3])
107+
print(a[0:1])
108+
print(a[1:2])
109+
print(a[2:3])
110+
print(a[:])
111+
`
112+
setupAndRunCode(t, script, "--color=never")
113+
assertOnlyOutput(t, stdOutBuffer, "a😀\n😀b\na\n😀\nb\na😀b\n")
114+
assertNoErrors(t)
115+
}

core/type_string.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,16 @@ func (s RadString) Len() int64 {
9797
return int64(com.StrLen(s.Plain()))
9898
}
9999

100+
func (s RadString) Runes() []rune {
101+
// todo also cachable
102+
return []rune(s.Plain())
103+
}
104+
100105
func (s *RadString) Index(i *Interpreter, idxNode *ts.Node) RadString {
101106
if idxNode.Kind() == rl.K_SLICE {
102107
// todo should maintain attr info
103108
start, end := ResolveSliceStartEnd(i, idxNode, s.Len())
104-
return NewRadString(s.Plain()[start:end])
109+
return NewRadString(string(s.Runes()[start:end]))
105110
}
106111

107112
rawIdx := i.eval(idxNode).Val.RequireInt(i, idxNode)
@@ -119,8 +124,7 @@ func (s *RadString) IndexAt(idx int64) RadString {
119124
for _, segment := range s.Segments {
120125
nextSegmentLen := len(segment.String)
121126
if cumLen+nextSegmentLen > int(idx) {
122-
// rune array conversion required to handle multibyte characters e.g. emojis
123-
char := []rune(s.Plain())[idx] // todo inefficient, should just look up in segment
127+
char := s.Runes()[idx] // todo inefficient, should just look up in segment
124128
return newRadStringWithAttr(string(char), segment)
125129
}
126130
cumLen += +nextSegmentLen

0 commit comments

Comments
 (0)