Skip to content

Commit

Permalink
encoding/japanese: make ShiftJIS replace with FFFD on error
Browse files Browse the repository at this point in the history
Updates golang/go#18898

Change-Id: I049e5ba1fca9529eeacc3aa58f7e5c2d17f22ecd
Reviewed-on: https://go-review.googlesource.com/37317
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
  • Loading branch information
mpvl committed Feb 26, 2017
1 parent 4687d73 commit 0ad425f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 16 deletions.
16 changes: 16 additions & 0 deletions encoding/japanese/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,22 @@ func TestNonRepertoire(t *testing.T) {
{dec, EUCJP, strings.Repeat("\x8f\xa0", n), strings.Repeat("\ufffd", 2*n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1", n), "a" + strings.Repeat("\ufffd", n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1\xff", n), "a" + strings.Repeat("\ufffd", 2*n)},

// Continue correctly after errors
{dec, ShiftJIS, "\x80", "\u0080"}, // It's what the spec says.
{dec, ShiftJIS, "\x81", "\ufffd"},
{dec, ShiftJIS, "\xe0", "\ufffd"},
{dec, ShiftJIS, "\xe0\x39", "\ufffd\u0039"},
{dec, ShiftJIS, "\x81\x7f", "\ufffd\u007f"},
{dec, ShiftJIS, "\xe0\xfd", "\ufffd"},
{dec, ShiftJIS, "\xe0\x9f", "燹"},
{dec, ShiftJIS, "\xfc\xfc", "\ufffd"},
{dec, ShiftJIS, "\xfc\xfd", "\ufffd"},
{dec, ShiftJIS, "\xfdaa", "\ufffdaa"},

{dec, ShiftJIS, strings.Repeat("\x81\x81", n), strings.Repeat("=", n)},
{dec, ShiftJIS, strings.Repeat("\xe0\xfd", n), strings.Repeat("\ufffd", n)},
{dec, ShiftJIS, "a" + strings.Repeat("\xe0\xfd", n), "a" + strings.Repeat("\ufffd", n)},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
Expand Down
32 changes: 16 additions & 16 deletions encoding/japanese/shiftjis.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package japanese

import (
"errors"
"unicode/utf8"

"golang.org/x/text/encoding"
Expand All @@ -24,8 +23,6 @@ var shiftJIS = internal.Encoding{
identifier.ShiftJIS,
}

var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")

type shiftJISDecoder struct{ transform.NopResetter }

func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
Expand All @@ -49,27 +46,31 @@ loop:

if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
break loop
}
r, size = '\ufffd', 1
goto write
}
c1 := src[nSrc+1]
switch {
case c1 < 0x40:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
goto write
case c1 < 0x7f:
c0--
c1 -= 0x40
case c1 == 0x7f:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
goto write
case c1 < 0x9f:
c0--
c1 -= 0x41
case c1 < 0xfd:
c1 -= 0x9f
default:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 2
goto write
}
r, size = '\ufffd', 2
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
Expand All @@ -79,20 +80,19 @@ loop:
}
}

case c0 == 0x80:
r, size = 0x80, 1

default:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 1
}

write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidShiftJIS
}
return nDst, nSrc, err
}

Expand Down

0 comments on commit 0ad425f

Please sign in to comment.