diff --git a/bench_test.go b/bench_test.go index 3719510..9c695b8 100644 --- a/bench_test.go +++ b/bench_test.go @@ -12,8 +12,12 @@ import ( "github.com/go-faster/errors" ) -//go:embed testdata/file.json -var benchData []byte +var ( + //go:embed testdata/file.json + benchData []byte + //go:embed testdata/floats.json + floatsData []byte +) func Benchmark_large_file(b *testing.B) { b.Run("JX", func(b *testing.B) { @@ -163,37 +167,25 @@ func Benchmark_large_file(b *testing.B) { } func BenchmarkValid(b *testing.B) { - b.Run("JX", func(b *testing.B) { - b.ReportAllocs() - b.SetBytes(int64(len(benchData))) - var d Decoder - for n := 0; n < b.N; n++ { - d.ResetBytes(benchData) - if err := d.Validate(); err != nil { - b.Fatal(err) - } - } - }) - b.Run("Std", func(b *testing.B) { - b.ReportAllocs() - b.SetBytes(int64(len(benchData))) - - for n := 0; n < b.N; n++ { - if !json.Valid(benchData) { - b.Fatal("invalid") + bch := []struct { + name string + input []byte + }{ + {"Big", benchData}, + {"Floats", floatsData}, + } + for _, bench := range bch { + b.Run(bench.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(bench.input))) + var d Decoder + for n := 0; n < b.N; n++ { + d.ResetBytes(bench.input) + if err := d.Validate(); err != nil { + b.Fatal(err) + } } - } - }) -} - -func Benchmark_std_large_file(b *testing.B) { - b.ReportAllocs() - for n := 0; n < b.N; n++ { - var result []struct{} - err := json.Unmarshal(benchData, &result) - if err != nil { - b.Error(err) - } + }) } } diff --git a/dec_skip.go b/dec_skip.go index 8efbc80..03d9556 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -1,6 +1,8 @@ package jx import ( + "io" + "github.com/go-faster/errors" ) @@ -50,11 +52,8 @@ func (d *Decoder) Skip() error { return d.skipThreeBytes('r', 'u', 'e') // true case 'f': return d.skipFourBytes('a', 'l', 's', 'e') // false - case '0': + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': d.unread() - _, err := d.Float32() - return err - case '-', '1', '2', '3', '4', '5', '6', '7', '8', '9': return d.skipNumber() case '[': if err := d.skipArr(); err != nil { @@ -97,51 +96,199 @@ func (d *Decoder) skipThreeBytes(b1, b2, b3 byte) error { return nil } -func (d *Decoder) skipNumber() error { - ok, err := d.skipNumberFast() - if err != nil || ok { - return err - } - d.unread() - if _, err := d.Float64(); err != nil { - return err +var ( + skipNumberSet = [256]byte{ + '0': 1, + '1': 1, + '2': 1, + '3': 1, + '4': 1, + '5': 1, + '6': 1, + '7': 1, + '8': 1, + '9': 1, + + ',': 2, + ']': 2, + '}': 2, + ' ': 2, + '\t': 2, + '\n': 2, + '\r': 2, } - return nil -} +) -func (d *Decoder) skipNumberFast() (ok bool, err error) { - dotFound := false - for i := d.head; i < d.tail; i++ { - c := d.buf[i] +// skipNumber reads one JSON number. +// +// Assumes d.buf is not empty. +func (d *Decoder) skipNumber() error { + const ( + digitTag byte = 1 + closerTag byte = 2 + ) + c := d.buf[d.head] + d.head++ + switch c { + case '-': + c, err := d.byte() + if err != nil { + return err + } + // Character after '-' must be a digit. + if skipNumberSet[c] != digitTag { + return badToken(c) + } + if c != '0' { + break + } + fallthrough + case '0': + // If buffer is empty, try to read more. + if d.head == d.tail { + err := d.read() + if err != nil { + // There is no data anymore. + if err == io.EOF { + return nil + } + return err + } + } + + c = d.buf[d.head] + if skipNumberSet[c] == closerTag { + return nil + } switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': case '.': - if dotFound { - return false, errors.New("more than one dot") - } - if i+1 == d.tail { - return false, nil + goto stateDot + case 'e', 'E': + goto stateExp + default: + return badToken(c) + } + } + for { + for i, c := range d.buf[d.head:d.tail] { + switch skipNumberSet[c] { + case closerTag: + d.head += i + return nil + case digitTag: + continue } - c = d.buf[i+1] + switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case '.': + d.head += i + goto stateDot + case 'e', 'E': + d.head += i + goto stateExp default: - return false, errors.New("no digit after dot") + return badToken(c) } - dotFound = true - default: - switch c { - case ',', ']', '}', ' ', '\t', '\n', '\r': - if d.head == i { - return false, nil // if - without following digits + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + d.head = d.tail + return nil + } + return err + } + } + +stateDot: + d.head++ + { + var last byte = '.' + for { + for i, c := range d.buf[d.head:d.tail] { + switch skipNumberSet[c] { + case closerTag: + d.head += i + // Check that dot is not last character. + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil + case digitTag: + last = c + continue + } + + switch c { + case 'e', 'E': + if last == '.' { + return badToken(c) + } + d.head += i + goto stateExp + default: + return badToken(c) } - d.head = i - return true, nil } - return false, nil + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + d.head = d.tail + // Check that dot is not last character. + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil + } + return err + } + } + } +stateExp: + d.head++ + // There must be a number or sign after e. + { + numOrSign, err := d.byte() + if err != nil { + return err + } + if skipNumberSet[numOrSign] != digitTag { // If next character is not a digit, check for sign. + if numOrSign == '-' || numOrSign == '+' { + num, err := d.byte() + if err != nil { + return err + } + // There must be a number after sign. + if skipNumberSet[num] != digitTag { + return badToken(num) + } + } else { + return badToken(numOrSign) + } + } + } + for { + for i, c := range d.buf[d.head:d.tail] { + if skipNumberSet[c] == closerTag { + d.head += i + return nil + } + if skipNumberSet[c] == 0 { + return badToken(c) + } + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + d.head = d.tail + return nil + } + return err } } - return false, nil } func (d *Decoder) skipStr() error { diff --git a/dec_skip_bench_test.go b/dec_skip_bench_test.go index 5a106c3..c5f4085 100644 --- a/dec_skip_bench_test.go +++ b/dec_skip_bench_test.go @@ -1,7 +1,6 @@ package jx import ( - "encoding/json" "testing" ) @@ -9,7 +8,7 @@ type TestResp struct { Code uint64 } -func Benchmark_skip(b *testing.B) { +func BenchmarkSkip(b *testing.B) { input := []byte(` { "_shards":{ @@ -51,34 +50,3 @@ func Benchmark_skip(b *testing.B) { } } } - -func Benchmark_std_skip(b *testing.B) { - input := []byte(` -{ - "_shards":{ - "total" : 5, - "successful" : 5, - "failed" : 0 - }, - "hits":{ - "total" : 1, - "hits" : [ - { - "_index" : "twitter", - "_type" : "tweet", - "_id" : "1", - "_source" : { - "user" : "kimchy", - "postDate" : "2009-11-15T14:12:12", - "message" : "trying out Elasticsearch" - } - } - ] - }, - "code": 200 -}`) - for n := 0; n < b.N; n++ { - result := TestResp{} - _ = json.Unmarshal(input, &result) - } -} diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 988a8f5..bff381c 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -2,14 +2,17 @@ package jx import ( "encoding/json" + "fmt" "io" "reflect" + "strings" "testing" + "testing/iotest" "github.com/stretchr/testify/require" ) -func Test_skip(t *testing.T) { +func TestSkip(t *testing.T) { type testCase struct { ptr interface{} inputs []string @@ -29,7 +32,61 @@ func Test_skip(t *testing.T) { `"\t"`, // valid }, }) - testCases = append(testCases, testCase{ + numberCase := testCase{ + ptr: (*float64)(nil), + inputs: []string{ + "0", // valid + "-", // invalid + "+", // invalid + "-1", // valid + "+1", // invalid + "-a", // invalid + "-0", // valid + "-00", // invalid + "-01", // invalid + "-\x00", // invalid, zero byte + "0.1", // valid + "0e1", // valid + "0e+1", // valid + "0e-1", // valid + "0e-11", // valid + "0e-1a", // invalid + "1.e1", // invalid + "0e-1+", // invalid + "0e", // invalid + "e", // invalid + "-e", // invalid + "+e", // invalid + ".e", // invalid + "e.", // invalid + "0.e", // invalid + "0-e", // invalid + "0e-", // invalid + "0e+", // invalid + "0.0e", // invalid + "0.0e1", // valid + "0.0e+", // invalid + "0.0e-", // invalid + "0e0+0", // invalid + "0.e0+0", // invalid + "0.0e+0", // valid + "0.0e+1", // valid + "0.0e0+0", // invalid + "0..1", // invalid, more dot + "1e+1", // valid + "1+1", // invalid + "1E1", // valid, e or E + "1ee1", // invalid + "100a", // invalid + "10.", // invalid + "-0.12", // valid + "0]", // invalid + "0e]", // invalid + "0e+]", // invalid + }, + } + testCases = append(testCases, numberCase) + arrayCase := testCase{ ptr: (*[]interface{})(nil), inputs: []string{ `[]`, // valid @@ -39,23 +96,11 @@ func Test_skip(t *testing.T) { `[`, // invalid `[[]`, // invalid }, - }) - testCases = append(testCases, testCase{ - ptr: (*float64)(nil), - inputs: []string{ - "+1", // invalid - "-a", // invalid - "-\x00", // invalid, zero byte - "0.1", // valid - "0..1", // invalid, more dot - "1e+1", // valid - "1+1", // invalid - "1E1", // valid, e or E - "1ee1", // invalid - "100a", // invalid - "10.", // invalid - }, - }) + } + for _, c := range numberCase.inputs { + arrayCase.inputs = append(arrayCase.inputs, `[`+c+`]`) + } + testCases = append(testCases, arrayCase) testCases = append(testCases, testCase{ ptr: (*struct{})(nil), inputs: []string{ @@ -71,29 +116,51 @@ func Test_skip(t *testing.T) { `{abc}`, // invalid }, }) - for _, testCase := range testCases { - valType := reflect.TypeOf(testCase.ptr).Elem() - for _, input := range testCase.inputs { - t.Run(input, func(t *testing.T) { - should := require.New(t) - ptrVal := reflect.New(valType) - stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) - iter := DecodeStr(input) - if stdErr == nil { - should.NoError(iter.Skip()) - should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) - } else { - should.Error(func() error { - if err := iter.Skip(); err != nil { - return err - } - if err := iter.Skip(); err != io.EOF { - return err - } - return nil - }()) + + testDecode := func(iter *Decoder, input string, stdErr error) func(t *testing.T) { + return func(t *testing.T) { + t.Cleanup(func() { + if t.Failed() { + t.Logf("Input: %q", input) } }) + + should := require.New(t) + if stdErr == nil { + should.NoError(iter.Skip()) + should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) + } else { + should.Error(func() error { + if err := iter.Skip(); err != nil { + return err + } + if err := iter.Skip(); err != io.EOF { + return err + } + return nil + }()) + } } } + for _, testCase := range testCases { + valType := reflect.TypeOf(testCase.ptr).Elem() + t.Run(valType.Kind().String(), func(t *testing.T) { + for inputIdx, input := range testCase.inputs { + t.Run(fmt.Sprintf("Test%d", inputIdx), func(t *testing.T) { + ptrVal := reflect.New(valType) + stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) + + t.Run("Buffer", testDecode(DecodeStr(input), input, stdErr)) + + r := strings.NewReader(input) + d := Decode(r, 512) + t.Run("Reader", testDecode(d, input, stdErr)) + + r.Reset(input) + obr := iotest.OneByteReader(r) + t.Run("OneByteReader", testDecode(Decode(obr, 512), input, stdErr)) + }) + } + }) + } } diff --git a/dec_skip_test.go b/dec_skip_test.go index 947ea2b..6a55090 100644 --- a/dec_skip_test.go +++ b/dec_skip_test.go @@ -6,17 +6,22 @@ import ( "github.com/stretchr/testify/require" ) -func Test_skip_number_in_array(t *testing.T) { +func TestSkip_number_in_array(t *testing.T) { + var err error + a := require.New(t) d := DecodeStr(`[-0.12, "stream"]`) - d.Elem() - d.Skip() - d.Elem() + _, err = d.Elem() + a.NoError(err) + err = d.Skip() + a.NoError(err) + _, err = d.Elem() + a.NoError(err) if s, _ := d.Str(); s != "stream" { t.FailNow() } } -func Test_skip_string_in_array(t *testing.T) { +func TestSkip_string_in_array(t *testing.T) { d := DecodeStr(`["hello", "stream"]`) d.Elem() d.Skip() @@ -26,7 +31,7 @@ func Test_skip_string_in_array(t *testing.T) { } } -func Test_skip_null(t *testing.T) { +func TestSkip_null(t *testing.T) { d := DecodeStr(`[null , "stream"]`) d.Elem() d.Skip() @@ -36,7 +41,7 @@ func Test_skip_null(t *testing.T) { } } -func Test_skip_true(t *testing.T) { +func TestSkip_true(t *testing.T) { d := DecodeStr(`[true , "stream"]`) d.Elem() d.Skip() @@ -46,7 +51,7 @@ func Test_skip_true(t *testing.T) { } } -func Test_skip_false(t *testing.T) { +func TestSkip_false(t *testing.T) { d := DecodeStr(`[false , "stream"]`) d.Elem() d.Skip() @@ -56,7 +61,7 @@ func Test_skip_false(t *testing.T) { } } -func Test_skip_array(t *testing.T) { +func TestSkip_array(t *testing.T) { d := DecodeStr(`[[1, [2, [3], 4]], "stream"]`) d.Elem() d.Skip() @@ -66,7 +71,7 @@ func Test_skip_array(t *testing.T) { } } -func Test_skip_empty_array(t *testing.T) { +func TestSkip_empty_array(t *testing.T) { d := DecodeStr(`[ [ ], "stream"]`) d.Elem() d.Skip() @@ -76,7 +81,7 @@ func Test_skip_empty_array(t *testing.T) { } } -func Test_skip_nested(t *testing.T) { +func TestSkip_nested(t *testing.T) { d := DecodeStr(`[ {"a" : [{"stream": "c"}], "d": 102 }, "stream"]`) if _, err := d.Elem(); err != nil { t.Fatal(err) @@ -90,7 +95,7 @@ func Test_skip_nested(t *testing.T) { require.Equal(t, "stream", s) } -func Test_skip_simple_nested(t *testing.T) { +func TestSkip_simple_nested(t *testing.T) { d := DecodeStr(`["foo", "bar", "baz"]`) require.NoError(t, d.Skip()) } diff --git a/dec_str.go b/dec_str.go index 8d911a7..3660991 100644 --- a/dec_str.go +++ b/dec_str.go @@ -130,6 +130,9 @@ func (d *Decoder) strSlow(v value) (value, error) { return v, errors.Wrap(err, "escape") } default: + if c < ' ' { + return value{}, badToken(c) + } v = v.byte(c) } } diff --git a/fuzz_test.go b/fuzz_test.go index ee0c618..9c9859a 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -5,6 +5,7 @@ package jx import ( "bytes" + "encoding/json" "testing" "github.com/go-faster/errors" @@ -22,7 +23,13 @@ func FuzzValid(f *testing.F) { f.Add([]byte(s)) } f.Fuzz(func(t *testing.T, data []byte) { - Valid(data) + var ( + std = json.Valid(data) + jx = Valid(data) + ) + if std != jx { + t.Fatalf(`Valid(%#v): %v (std) != %v (jx)`, string(data), std, jx) + } }) } diff --git a/testdata/floats.json b/testdata/floats.json new file mode 100644 index 0000000..981c45b --- /dev/null +++ b/testdata/floats.json @@ -0,0 +1,102 @@ +[ + 0.6046602879796196, + 0.9405090880450124, + 0.6645600532184904, + 0.4377141871869802, + 0.4246374970712657, + 0.6868230728671094, + 0.06563701921747622, + 0.15651925473279124, + 0.09696951891448456, + 0.30091186058528707, + 0.5152126285020654, + 0.8136399609900968, + 0.21426387258237492, + 0.380657189299686, + 0.31805817433032985, + 0.4688898449024232, + 0.28303415118044517, + 0.29310185733681576, + 0.6790846759202163, + 0.21855305259276428, + 0.20318687664732285, + 0.360871416856906, + 0.5706732760710226, + 0.8624914374478864, + 0.29311424455385804, + 0.29708256355629153, + 0.7525730355516119, + 0.2065826619136986, + 0.865335013001561, + 0.6967191657466347, + 0.5238203060500009, + 0.028303083325889995, + 0.15832827774512764, + 0.6072534395455154, + 0.9752416188605784, + 0.07945362337387198, + 0.5948085976830626, + 0.05912065131387529, + 0.692024587353112, + 0.30152268100656, + 0.17326623818270528, + 0.5410998550087353, + 0.544155573000885, + 0.27850762181610883, + 0.4231522015718281, + 0.5305857153507052, + 0.2535405005150605, + 0.28208099496492467, + 0.7886049150193449, + 0.3618054804803169, + 0.8805431227416171, + 0.2971122606397708, + 0.8943617293304537, + 0.09745461839911657, + 0.9769168685862624, + 0.07429099894984302, + 0.22228941700678773, + 0.6810783123925709, + 0.24151508854715265, + 0.31152244431052484, + 0.932846428518434, + 0.741848959991823, + 0.8010550426526613, + 0.7302314772948083, + 0.18292491645390843, + 0.4283570818068078, + 0.8969919575618727, + 0.6826534880132438, + 0.9789293555766876, + 0.9222122589217269, + 0.09083727535388708, + 0.4931419977048804, + 0.9269868035744142, + 0.9549454404167818, + 0.3479539636282229, + 0.6908388315056789, + 0.7109071952999951, + 0.5637795958152644, + 0.6494894605929404, + 0.5517650490127749, + 0.7558235074915978, + 0.40380328579570035, + 0.13065111702897217, + 0.9859647293402467, + 0.8963417453962161, + 0.3220839705208817, + 0.7211477651926741, + 0.6445397825093294, + 0.08552050754191123, + 0.6695752976997745, + 0.6227283173637045, + 0.3696928436398219, + 0.2368225468054852, + 0.5352818906344061, + 0.18724610140105305, + 0.2388407028053186, + 0.6280981712183633, + 0.1267529293726013, + 0.28133029380535923, + 0.41032284435628247 +] diff --git a/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a b/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a new file mode 100644 index 0000000..b52344a --- /dev/null +++ b/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0E0+0")