Skip to content

Commit

Permalink
encoding/traditionalchinese: make Big5 replace with FFFD on error
Browse files Browse the repository at this point in the history
Updates golang/go#18898

Change-Id: If234aa5fdc35daf5ab02f49400462aa0c1ffa5ea
Reviewed-on: https://go-review.googlesource.com/37325
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
  • Loading branch information
mpvl committed Mar 1, 2017
1 parent 0ad425f commit d680ca3
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 20 deletions.
32 changes: 24 additions & 8 deletions encoding/traditionalchinese/all_test.go
Expand Up @@ -5,6 +5,7 @@
package traditionalchinese

import (
"fmt"
"io/ioutil"
"strings"
"testing"
Expand All @@ -28,22 +29,37 @@ func TestNonRepertoire(t *testing.T) {
e encoding.Encoding
src, want string
}{
{dec, Big5, "\x80", "\ufffd"},
{dec, Big5, "\x81", "\ufffd"},
{dec, Big5, "\x81\x30", "\ufffd\x30"},
{dec, Big5, "\x81\x40", "\ufffd"},
{dec, Big5, "\x81\xa0", "\ufffd"},
{dec, Big5, "\xff", "\ufffd"},

{enc, Big5, "갂", ""},
{enc, Big5, "a갂", "a"},
{enc, Big5, "\u43f0갂", "\x87@"},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)

dst, _, err := transform.String(tr, tc.src)
if err != wantErr {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
dst := make([]byte, 100)
src := []byte(tc.src)
for i := 0; i <= len(tc.src); i++ {
nDst, nSrc, err := tr.Transform(dst, src[:i], false)
if err != nil && err != transform.ErrShortSrc && err != wantErr {
t.Fatalf("error on first call to Transform: %v", err)
}
n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
nDst += n
if err != wantErr {
t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
}
if got := string(dst[:nDst]); got != tc.want {
t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
}
}
})
}
}

Expand Down
25 changes: 13 additions & 12 deletions encoding/traditionalchinese/big5.go
Expand Up @@ -5,7 +5,6 @@
package traditionalchinese

import (
"errors"
"unicode/utf8"

"golang.org/x/text/encoding"
Expand All @@ -26,8 +25,6 @@ var big5 = internal.Encoding{
identifier.Big5,
}

var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")

type big5Decoder struct{ transform.NopResetter }

func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
Expand All @@ -40,18 +37,25 @@ loop:

case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
c1 := src[nSrc+1]
switch {
case 0x40 <= c1 && c1 < 0x7f:
c1 -= 0x40
case 0xa1 <= c1 && c1 < 0xff:
c1 -= 0x62
case c1 < 0x40:
r, size = utf8.RuneError, 1
goto write
default:
err = errInvalidBig5
break loop
r, size = utf8.RuneError, 2
goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
Expand Down Expand Up @@ -80,10 +84,10 @@ loop:
}

default:
err = errInvalidBig5
break loop
r, size = utf8.RuneError, 1
}

write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
Expand All @@ -99,9 +103,6 @@ loop:
nDst += copy(dst[nDst:], s)
continue loop
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidBig5
}
return nDst, nSrc, err
}

Expand Down

0 comments on commit d680ca3

Please sign in to comment.