Skip to content

Commit

Permalink
Merge pull request #22 from tdakkota/feat/faster-str-decode
Browse files Browse the repository at this point in the history
feat: unroll `skipStr` loop
  • Loading branch information
ernado committed Jan 16, 2022
2 parents 0493760 + c048666 commit dcb5603
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 88 deletions.
115 changes: 88 additions & 27 deletions dec_skip.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,18 +325,70 @@ var (
//
// Assumes first quote was consumed.
func (d *Decoder) skipStr() error {
var (
c byte
i int
)
readStr:
for {
for i, c := range d.buf[d.head:d.tail] {
switch {
case c == '"':
d.head += i + 1
return nil
case c == '\\':
d.head += i + 1
goto readEscaped
case c < ' ':
return badToken(c)
i = 0
buf := d.buf[d.head:d.tail]
for len(buf) >= 8 {
c = buf[0]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[1]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[2]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[3]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[4]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[5]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[6]
if safeSet[c] != 0 {
goto readTok
}
i++

c = buf[7]
if safeSet[c] != 0 {
goto readTok
}
i++

buf = buf[8:]
}
var n int
for n, c = range buf {
if safeSet[c] != 0 {
i += n
goto readTok
}
}

Expand All @@ -348,25 +400,34 @@ readStr:
}
}

readEscaped:
v, err := d.byte()
if err != nil {
return err
}
switch escapedStrSet[v] {
case 1:
case 2:
for i := 0; i < 4; i++ {
h, err := d.byte()
if err != nil {
return err
}
if hexSet[h] == 0 {
return badToken(h)
readTok:
switch {
case c == '"':
d.head += i + 1
return nil
case c == '\\':
d.head += i + 1
v, err := d.byte()
if err != nil {
return err
}
switch escapedStrSet[v] {
case 1:
case 2:
for i := 0; i < 4; i++ {
h, err := d.byte()
if err != nil {
return err
}
if hexSet[h] == 0 {
return badToken(h)
}
}
default:
return badToken(v)
}
default:
return badToken(v)
case c < ' ':
return badToken(c)
}
goto readStr
}
Expand Down
99 changes: 53 additions & 46 deletions dec_skip_cases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,52 +12,59 @@ import (
"github.com/stretchr/testify/require"
)

var testStrings = []string{
`""`, // valid
`"hello"`, // valid
`"`, // invalid
`"foo`, // invalid
`"\`, // invalid
`"\"`, // invalid
`"\u`, // invalid
`"\u1`, // invalid
`"\u12`, // invalid
`"\u123`, // invalid
`"\u\n"`, // invalid
`"\u1\n"`, // invalid
`"\u12\n"`, // invalid
`"\u12\n"`, // invalid
`"\u123\n"`, // invalid
`"\u1d`, // invalid
`"\u$`, // invalid
`"\21412`, // invalid
`"\uD834\1`, // invalid
`"\uD834\u3`, // invalid
`"\uD834\`, // invalid
`"\uD834`, // invalid
`"\u07F9`, // invalid
`"\u1234\n"`, // valid
`"\x00"`, // invalid
"\"\x00\"", // invalid
"\"\t\"", // invalid
"\"\\b\x06\"", // invalid
`"\t"`, // valid
`"\n"`, // valid
`"\r"`, // valid
`"\b"`, // valid
`"\f"`, // valid
`"\/"`, // valid
`"\\"`, // valid
"\"\\u000X\"", // invalid
"\"\\uxx0X\"", // invalid
"\"\\uxxxx\"", // invalid
"\"\\u000.\"", // invalid
"\"\\u0000\"", // valid
"\"\\ua123\"", // valid
"\"\\uffff\"", // valid
"\"\\ueeee\"", // valid
"\"\\uFFFF\"", // valid
}
var testStrings = append([]string{
`""`, // valid
`"hello"`, // valid
`"`, // invalid
`"foo`, // invalid
`"\`, // invalid
`"\"`, // invalid
`"\u`, // invalid
`"\u1`, // invalid
`"\u12`, // invalid
`"\u123`, // invalid
`"\u\n"`, // invalid
`"\u1\n"`, // invalid
`"\u12\n"`, // invalid
`"\u12\n"`, // invalid
`"\u123\n"`, // invalid
`"\u1d`, // invalid
`"\u$`, // invalid
`"\21412`, // invalid
`"\uD834\1`, // invalid
`"\uD834\u3`, // invalid
`"\uD834\`, // invalid
`"\uD834`, // invalid
`"\u07F9`, // invalid
`"\u1234\n"`, // valid
`"\x00"`, // invalid
"\"\x00\"", // invalid
"\"\t\"", // invalid
"\"\\b\x06\"", // invalid
`"\t"`, // valid
`"\n"`, // valid
`"\r"`, // valid
`"\b"`, // valid
`"\f"`, // valid
`"\/"`, // valid
`"\\"`, // valid
"\"\\u000X\"", // invalid
"\"\\uxx0X\"", // invalid
"\"\\uxxxx\"", // invalid
"\"\\u000.\"", // invalid
"\"\\u0000\"", // valid
"\"\\ua123\"", // valid
"\"\\uffff\"", // valid
"\"\\ueeee\"", // valid
"\"\\uFFFF\"", // valid
`"ab\n` + "\x00" + `"`, // invalid
}, func() (r []string) {
// Generate tests for invalid space sequences.
for i := byte(0); i <= ' '; i++ {
r = append(r, `"`+string(i)+`"`)
}
return r
}()...)

var testObjs = []string{
"", // invalid
Expand Down
45 changes: 31 additions & 14 deletions dec_str_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"
"testing"
"testing/iotest"
"unicode/utf8"

"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -35,8 +36,14 @@ func TestUnexpectedTokenErr_Error(t *testing.T) {
}

func TestDecoder_Str(t *testing.T) {
testStr := func(d *Decoder, valid bool) func(t *testing.T) {
testStr := func(d *Decoder, input string, valid bool) func(t *testing.T) {
return func(t *testing.T) {
t.Cleanup(func() {
if t.Failed() {
t.Logf("Input: %q", input)
}
})

_, err := d.Str()
if valid {
require.NoError(t, err)
Expand All @@ -45,19 +52,21 @@ func TestDecoder_Str(t *testing.T) {
}
}
}
for _, input := range testStrings {
for i, input := range testStrings {
valid := json.Valid([]byte(input))

t.Run("Buffer", testStr(DecodeStr(input), valid))
t.Run(fmt.Sprintf("Test%d", i), func(t *testing.T) {
t.Run("Buffer", testStr(DecodeStr(input), input, valid))

r := strings.NewReader(input)
d := Decode(r, 512)
t.Run("Reader", testStr(d, valid))
r := strings.NewReader(input)
d := Decode(r, 512)
t.Run("Reader", testStr(d, input, valid))

r.Reset(input)
obr := iotest.OneByteReader(r)
d.Reset(obr)
t.Run("OneByteReader", testStr(d, valid))
r.Reset(input)
obr := iotest.OneByteReader(r)
d.Reset(obr)
t.Run("OneByteReader", testStr(d, input, valid))
})
}
}

Expand Down Expand Up @@ -92,9 +101,17 @@ func benchmarkDecoderStrBytes(str string) func(b *testing.B) {
}

func BenchmarkDecoder_StrBytes(b *testing.B) {
for _, size := range []int{
1, 8, 16, 64, 128, 1024,
} {
b.Run(fmt.Sprintf("%db", size), benchmarkDecoderStrBytes(strings.Repeat("a", size)))
runBench := func(char string) func(b *testing.B) {
return func(b *testing.B) {
for _, size := range []int{
2, 8, 16, 64, 128, 1024,
} {
count := utf8.RuneCountInString(char)
b.Run(fmt.Sprintf("%db", size), benchmarkDecoderStrBytes(strings.Repeat(char, size/count)))
}
}
}

b.Run("Plain", runBench("a"))
b.Run("Escaped", runBench("ф"))
}
2 changes: 1 addition & 1 deletion w_str.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ var safeSet = [256]byte{
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
'"': 1,
'\\': 1,
}
Expand Down

0 comments on commit dcb5603

Please sign in to comment.