Skip to content

Commit

Permalink
encoding/unicode: correctly handle single-byte UTF-16 inputs (and har…
Browse files Browse the repository at this point in the history
…den transform.String)

If a single byte is passed to a UTF-16 decoder
with atEOF set, it should not ask for more src
with ErrShortSrc but return an error. Also harden
transform.String not to enter an infinite loop if a
Transformer does return ErrShortSrc with atEOF true.

Fixes #39491
Fixes CVE-2020-14040

Change-Id: If8d2a9bca4eb9b4270c98a4967d356082043e17e
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/768667
Reviewed-by: Filippo Valsorda <valsorda@google.com>
Reviewed-on: https://go-review.googlesource.com/c/text/+/238238
Run-TryBot: Katie Hockman <katie@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
  • Loading branch information
katiehockman committed Jun 16, 2020
1 parent 3a82255 commit 23ae387
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 18 deletions.
11 changes: 4 additions & 7 deletions encoding/unicode/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,13 @@ func (u *utf16Decoder) Reset() {
}

func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
if len(src) == 0 {
if atEOF && u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
return 0, 0, nil
}
if u.current.bomPolicy&acceptBOM != 0 {
if len(src) < 2 {
return 0, 0, transform.ErrShortSrc
}
if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 {
switch {
case src[0] == 0xfe && src[1] == 0xff:
u.current.endianness = BigEndian
Expand Down
54 changes: 44 additions & 10 deletions encoding/unicode/unicode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,40 @@ func TestUTF16(t *testing.T) {
nSrc: 0,
err: ErrMissingBOM,
t: utf16BEEB.NewDecoder(),
}, {
desc: "utf-16 dec: Fail on single byte missing BOM when required",
src: "\x00",
sizeDst: 4,
t: utf16BEEB.NewDecoder(),
err: ErrMissingBOM,
}, {
desc: "utf-16 dec: Fail on short src missing BOM when required",
src: "\x00",
notEOF: true,
sizeDst: 4,
t: utf16BEEB.NewDecoder(),
err: transform.ErrShortSrc,
}, {
desc: "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)",
src: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 10,
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16 dec: incorrect UTF-16: odd bytes",
src: "\x00",
sizeDst: 100,
want: "\uFFFD",
nSrc: 1,
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16 dec: Fail on incorrect UTF-16: short source odd bytes",
src: "\x00",
notEOF: true,
sizeDst: 100,
t: utf16BEUB.NewDecoder(),
err: transform.ErrShortSrc,
}, {
// This is an error according to RFC 2781. But errors in RFC 2781 are
// open to interpretations, so I guess this is fine.
Expand Down Expand Up @@ -273,16 +300,23 @@ func TestUTF16(t *testing.T) {
t: utf16LEUB.NewDecoder(),
}}
for i, tc := range testCases {
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
for j := 0; j < 2; j++ {
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
}
// Since Transform is stateful, run failures again
// to ensure that the same error occurs a second time.
if err == nil {
break
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion transform/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,8 @@ func String(t Transformer, s string) (result string, n int, err error) {
// Transform the remaining input, growing dst and src buffers as necessary.
for {
n := copy(src, s[pSrc:])
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
atEOF := pSrc+n == len(s)
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], atEOF)
pDst += nDst
pSrc += nSrc

Expand All @@ -659,6 +660,9 @@ func String(t Transformer, s string) (result string, n int, err error) {
dst = grow(dst, pDst)
}
} else if err == ErrShortSrc {
if atEOF {
return string(dst[:pDst]), pSrc, err
}
if nSrc == 0 {
src = grow(src, 0)
}
Expand Down
23 changes: 23 additions & 0 deletions transform/transform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1315,3 +1315,26 @@ var (
aaa = strings.Repeat("a", 4096)
AAA = strings.Repeat("A", 4096)
)

type badTransformer struct{}

func (bt badTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return 0, 0, ErrShortSrc
}

func (bt badTransformer) Reset() {}

func TestBadTransformer(t *testing.T) {
bt := badTransformer{}
if _, _, err := String(bt, "aaa"); err != ErrShortSrc {
t.Errorf("String expected ErrShortSrc, got nil")
}
if _, _, err := Bytes(bt, []byte("aaa")); err != ErrShortSrc {
t.Errorf("Bytes expected ErrShortSrc, got nil")
}
r := NewReader(bytes.NewReader([]byte("aaa")), bt)
var bytes []byte
if _, err := r.Read(bytes); err != ErrShortSrc {
t.Errorf("NewReader Read expected ErrShortSrc, got nil")
}
}

0 comments on commit 23ae387

Please sign in to comment.