-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Description
Go version
go version go1.24.3 darwin/arm64
Output of go env in your module/workspace:
AR='ar'
CC='cc'
CGO_CFLAGS='-O2 -g'
CGO_CPPFLAGS=''
CGO_CXXFLAGS='-O2 -g'
CGO_ENABLED='1'
CGO_FFLAGS='-O2 -g'
CGO_LDFLAGS='-O2 -g'
CXX='c++'
GCCGO='gccgo'
GO111MODULE=''
GOARCH='arm64'
GOARM64='v8.0'
GOAUTH='netrc'
GOBIN=''
GOCACHE='/Users/evan.jones/Library/Caches/go-build'
GOCACHEPROG=''
GODEBUG=''
GOENV='/Users/evan.jones/Library/Application Support/go/env'
GOEXE=''
GOEXPERIMENT=''
GOFIPS140='off'
GOFLAGS=''
GOGCCFLAGS='-fPIC -arch arm64 -pthread -fno-caret-diagnostics -Qunused-arguments -fmessage-length=0 -ffile-prefix-map=/var/folders/pp/tvwz4y2x2qz97pf8bftqxhrw0000gp/T/go-build572495338=/tmp/go-build -gno-record-gcc-switches -fno-common'
GOHOSTARCH='arm64'
GOHOSTOS='darwin'
GOINSECURE=''
GOMOD='/Users/evan.jones/go_x_text_bug/go.mod'
GOMODCACHE='/Users/evan.jones/go/pkg/mod'
GOOS='darwin'
GOPATH='/Users/evan.jones/go'
GOROOT='/opt/homebrew/Cellar/go/1.24.3/libexec'
GOSUMDB='sum.golang.org'
GOTELEMETRY='on'
GOTELEMETRYDIR='/Users/evan.jones/Library/Application Support/go/telemetry'
GOTMPDIR=''
GOTOOLCHAIN='auto'
GOTOOLDIR='/opt/homebrew/Cellar/go/1.24.3/libexec/pkg/tool/darwin_arm64'
GOVCS=''
GOVERSION='go1.24.3'
GOWORK=''
PKG_CONFIG='pkg-config'What did you do?
When using norm.Iter on some invalid UTF-8 byte sequence, Iter.Done always returns false. In these cases Iter.Next returns an empty byte slice. This causes code attempting to use the iterator to enter an infinite loop.
What did you see happen?
An infinite loop: the iterator always returns Iter.Done() == false.
What did you expect to see?
I expected norm.Iter to be equivalent to executing norm.Form.String, followed by rune-by-rune iteration over the resulting string. In particular, I was changing code that was using for i, rune := range norm.NFC.String(input) { ... } to try to use norm.Iter instead. I had a fuzz test to compare the two implementations, and it found these invalid cases.
Here is a unit test that I expect to pass:
func TestNFCIterBug(t *testing.T) {
const maxIterations = 20
const badInput = "\xf0\xd9\x95"
nfcString := norm.NFC.String(badInput)
for i, b := range []byte(badInput) {
t.Logf("badInput byte i=%d b=0x%x", i, b)
}
for i, r := range nfcString {
t.Logf("nfcString rune i=%d r=0x%x", i, r)
}
iter := norm.Iter{}
iter.InitString(norm.NFC, nfcString)
i := 0
for !iter.Done() {
bytes := iter.Next()
t.Logf("norm Iter i=%d bytes=%#v", i, bytes)
i += 1
if i > maxIterations {
t.Fatalf("stopping after %d iterations to avoid infinite loop", maxIterations)
}
}
}The output of this test is the following:
=== RUN TestNFCBug
main_test.go:17: badInput byte i=0 b=0xf0
main_test.go:17: badInput byte i=1 b=0xd9
main_test.go:17: badInput byte i=2 b=0x95
main_test.go:21: nfcString rune i=0 r=0xfffd
main_test.go:21: nfcString rune i=1 r=0x655
main_test.go:29: norm Iter i=0 bytes=[]byte{}
main_test.go:29: norm Iter i=1 bytes=[]byte{}
main_test.go:29: norm Iter i=2 bytes=[]byte{}
main_test.go:29: norm Iter i=3 bytes=[]byte{}
main_test.go:29: norm Iter i=4 bytes=[]byte{}
main_test.go:29: norm Iter i=5 bytes=[]byte{}
main_test.go:29: norm Iter i=6 bytes=[]byte{}
main_test.go:29: norm Iter i=7 bytes=[]byte{}
main_test.go:29: norm Iter i=8 bytes=[]byte{}
main_test.go:29: norm Iter i=9 bytes=[]byte{}
main_test.go:29: norm Iter i=10 bytes=[]byte{}
main_test.go:29: norm Iter i=11 bytes=[]byte{}
main_test.go:29: norm Iter i=12 bytes=[]byte{}
main_test.go:29: norm Iter i=13 bytes=[]byte{}
main_test.go:29: norm Iter i=14 bytes=[]byte{}
main_test.go:29: norm Iter i=15 bytes=[]byte{}
main_test.go:29: norm Iter i=16 bytes=[]byte{}
main_test.go:29: norm Iter i=17 bytes=[]byte{}
main_test.go:29: norm Iter i=18 bytes=[]byte{}
main_test.go:29: norm Iter i=19 bytes=[]byte{}
main_test.go:29: norm Iter i=20 bytes=[]byte{}
main_test.go:32: stopping after 20 iterations to avoid infinite loop
I also have a fuzz test that I used to find this input that I am happy to contribute to the Go project if it is useful:
// Compare norm.NFC.Bytes to norm.Iter.
func FuzzNFCIterator(f *testing.F) {
f.Add("")
f.Add("ascii")
f.Add("e\u0301 decomposed")
f.Fuzz(func(t *testing.T, s string) {
// check UTF-8 valid strings only: no problems
// if !utf8.ValidString(s) {
// return
// }
normalized := string(norm.NFC.String(s))
runes := []rune(normalized)
iter := norm.Iter{}
iter.InitString(norm.NFC, normalized)
runeI := 0
for !iter.Done() {
runeBytes := iter.Next()
if len(runeBytes) == 0 {
t.Fatalf("iter.Next() returned empty byte slice for s=%#v %s",
s, strconv.QuoteToASCII(s))
}
for len(runeBytes) > 0 {
rune, runeLen := utf8.DecodeRune(runeBytes)
runeBytes = runeBytes[runeLen:]
if runes[runeI] != rune {
t.Fatalf("s=%#v %s: runes[runeI=%d]=0x%x iter returned 0x%x",
s, strconv.QuoteToASCII(s), runeI, runes[runeI], rune)
}
if rune == utf8.RuneError {
t.Fatalf("s=%#v %s: iter returned utf8.RuneError at runeI=%d",
s, strconv.QuoteToASCII(s), runeI)
}
runeI++
}
}
if runeI != len(runes) {
t.Fatalf("s=%#v %s: expected %d runes, got %d",
s, strconv.QuoteToASCII(s), len(runes), runeI)
}
})
}This code is available as a standalone git repository in case that is helpfu: https://github.com/evanj/go_x_text_bug