Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: rewrite skipNumber #12

Merged
merged 11 commits into from
Jan 13, 2022
56 changes: 24 additions & 32 deletions bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ import (
"github.com/go-faster/errors"
)

//go:embed testdata/file.json
var benchData []byte
var (
//go:embed testdata/file.json
benchData []byte
//go:embed testdata/floats.json
floatsData []byte
)

func Benchmark_large_file(b *testing.B) {
b.Run("JX", func(b *testing.B) {
Expand Down Expand Up @@ -163,37 +167,25 @@ func Benchmark_large_file(b *testing.B) {
}

func BenchmarkValid(b *testing.B) {
b.Run("JX", func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(len(benchData)))
var d Decoder
for n := 0; n < b.N; n++ {
d.ResetBytes(benchData)
if err := d.Validate(); err != nil {
b.Fatal(err)
}
}
})
b.Run("Std", func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(len(benchData)))

for n := 0; n < b.N; n++ {
if !json.Valid(benchData) {
b.Fatal("invalid")
bch := []struct {
name string
input []byte
}{
{"Big", benchData},
{"Floats", floatsData},
}
for _, bench := range bch {
b.Run(bench.name, func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(len(bench.input)))
var d Decoder
for n := 0; n < b.N; n++ {
d.ResetBytes(bench.input)
if err := d.Validate(); err != nil {
b.Fatal(err)
}
}
}
})
}

func Benchmark_std_large_file(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
var result []struct{}
err := json.Unmarshal(benchData, &result)
if err != nil {
b.Error(err)
}
})
}
}

Expand Down
221 changes: 184 additions & 37 deletions dec_skip.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package jx

import (
"io"

"github.com/go-faster/errors"
)

Expand Down Expand Up @@ -50,11 +52,8 @@ func (d *Decoder) Skip() error {
return d.skipThreeBytes('r', 'u', 'e') // true
case 'f':
return d.skipFourBytes('a', 'l', 's', 'e') // false
case '0':
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
d.unread()
_, err := d.Float32()
return err
case '-', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return d.skipNumber()
case '[':
if err := d.skipArr(); err != nil {
Expand Down Expand Up @@ -97,51 +96,199 @@ func (d *Decoder) skipThreeBytes(b1, b2, b3 byte) error {
return nil
}

func (d *Decoder) skipNumber() error {
ok, err := d.skipNumberFast()
if err != nil || ok {
return err
}
d.unread()
if _, err := d.Float64(); err != nil {
return err
var (
skipNumberSet = [256]byte{
'0': 1,
'1': 1,
'2': 1,
'3': 1,
'4': 1,
'5': 1,
'6': 1,
'7': 1,
'8': 1,
'9': 1,

',': 2,
']': 2,
'}': 2,
' ': 2,
'\t': 2,
'\n': 2,
'\r': 2,
}
return nil
}
)

func (d *Decoder) skipNumberFast() (ok bool, err error) {
dotFound := false
for i := d.head; i < d.tail; i++ {
c := d.buf[i]
// skipNumber reads one JSON number.
//
// Assumes d.buf is not empty.
func (d *Decoder) skipNumber() error {
const (
digitTag byte = 1
closerTag byte = 2
)
c := d.buf[d.head]
d.head++
switch c {
case '-':
c, err := d.byte()
if err != nil {
return err
}
// Character after '-' must be a digit.
if skipNumberSet[c] != digitTag {
return badToken(c)
}
if c != '0' {
break
}
fallthrough
case '0':
// If buffer is empty, try to read more.
if d.head == d.tail {
err := d.read()
if err != nil {
// There is no data anymore.
if err == io.EOF {
return nil
}
return err
}
}

c = d.buf[d.head]
if skipNumberSet[c] == closerTag {
return nil
}
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
case '.':
if dotFound {
return false, errors.New("more than one dot")
}
if i+1 == d.tail {
return false, nil
goto stateDot
case 'e', 'E':
goto stateExp
default:
return badToken(c)
}
}
for {
for i, c := range d.buf[d.head:d.tail] {
switch skipNumberSet[c] {
case closerTag:
d.head += i
return nil
case digitTag:
continue
}
c = d.buf[i+1]

switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
case '.':
d.head += i
goto stateDot
case 'e', 'E':
d.head += i
goto stateExp
default:
return false, errors.New("no digit after dot")
return badToken(c)
}
dotFound = true
default:
switch c {
case ',', ']', '}', ' ', '\t', '\n', '\r':
if d.head == i {
return false, nil // if - without following digits
}

if err := d.read(); err != nil {
// There is no data anymore.
if err == io.EOF {
d.head = d.tail
return nil
}
return err
}
}

stateDot:
d.head++
{
var last byte = '.'
for {
for i, c := range d.buf[d.head:d.tail] {
switch skipNumberSet[c] {
case closerTag:
d.head += i
// Check that dot is not last character.
if last == '.' {
return io.ErrUnexpectedEOF
}
return nil
case digitTag:
last = c
continue
}

switch c {
case 'e', 'E':
if last == '.' {
return badToken(c)
}
d.head += i
goto stateExp
default:
return badToken(c)
}
d.head = i
return true, nil
}
return false, nil

if err := d.read(); err != nil {
// There is no data anymore.
if err == io.EOF {
d.head = d.tail
// Check that dot is not last character.
if last == '.' {
return io.ErrUnexpectedEOF
}
return nil
}
return err
}
}
}
stateExp:
d.head++
// There must be a number or sign after e.
{
numOrSign, err := d.byte()
if err != nil {
return err
}
if skipNumberSet[numOrSign] != digitTag { // If next character is not a digit, check for sign.
if numOrSign == '-' || numOrSign == '+' {
num, err := d.byte()
if err != nil {
return err
}
// There must be a number after sign.
if skipNumberSet[num] != digitTag {
return badToken(num)
}
} else {
return badToken(numOrSign)
}
}
}
for {
for i, c := range d.buf[d.head:d.tail] {
if skipNumberSet[c] == closerTag {
d.head += i
return nil
}
if skipNumberSet[c] == 0 {
return badToken(c)
}
}

if err := d.read(); err != nil {
// There is no data anymore.
if err == io.EOF {
d.head = d.tail
return nil
}
return err
}
}
return false, nil
}

func (d *Decoder) skipStr() error {
Expand Down
34 changes: 1 addition & 33 deletions dec_skip_bench_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
package jx

import (
"encoding/json"
"testing"
)

type TestResp struct {
Code uint64
}

func Benchmark_skip(b *testing.B) {
func BenchmarkSkip(b *testing.B) {
input := []byte(`
{
"_shards":{
Expand Down Expand Up @@ -51,34 +50,3 @@ func Benchmark_skip(b *testing.B) {
}
}
}

func Benchmark_std_skip(b *testing.B) {
input := []byte(`
{
"_shards":{
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits":{
"total" : 1,
"hits" : [
{
"_index" : "twitter",
"_type" : "tweet",
"_id" : "1",
"_source" : {
"user" : "kimchy",
"postDate" : "2009-11-15T14:12:12",
"message" : "trying out Elasticsearch"
}
}
]
},
"code": 200
}`)
for n := 0; n < b.N; n++ {
result := TestResp{}
_ = json.Unmarshal(input, &result)
}
}