From f7517311532e57338abdaa3edbd85d9c257b1e5e Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 12 Jan 2022 16:34:03 +0300 Subject: [PATCH 01/11] test: add more skipNumber tests --- dec_skip_cases_test.go | 89 +++++++++++++++++++++++++++--------------- dec_skip_test.go | 29 ++++++++------ 2 files changed, 74 insertions(+), 44 deletions(-) diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 988a8f5..71b391d 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -2,6 +2,7 @@ package jx import ( "encoding/json" + "fmt" "io" "reflect" "testing" @@ -12,6 +13,7 @@ import ( func Test_skip(t *testing.T) { type testCase struct { ptr interface{} + name string inputs []string } var testCases []testCase @@ -43,17 +45,33 @@ func Test_skip(t *testing.T) { testCases = append(testCases, testCase{ ptr: (*float64)(nil), inputs: []string{ - "+1", // invalid - "-a", // invalid - "-\x00", // invalid, zero byte - "0.1", // valid - "0..1", // invalid, more dot - "1e+1", // valid - "1+1", // invalid - "1E1", // valid, e or E - "1ee1", // invalid - "100a", // invalid - "10.", // invalid + "0", // valid + "+1", // invalid + "-a", // invalid + "-\x00", // invalid, zero byte + "0.1", // valid + "0e1", // valid + "0e+1", // valid + "0e-1", // valid + "0e", // invalid + "-e", // invalid + "+e", // invalid + ".e", // invalid + "0.e", // invalid + "0.e", // invalid + "0.0e", // invalid + "0.0e1", // valid + "0.0e+1", // valid + "0.0e+", // invalid + "0.0e-", // invalid + "0..1", // invalid, more dot + "1e+1", // valid + "1+1", // invalid + "1E1", // valid, e or E + "1ee1", // invalid + "100a", // invalid + "10.", // invalid + "-0.12", // valid }, }) testCases = append(testCases, testCase{ @@ -73,27 +91,34 @@ func Test_skip(t *testing.T) { }) for _, testCase := range testCases { valType := reflect.TypeOf(testCase.ptr).Elem() - for _, input := range testCase.inputs { - t.Run(input, func(t *testing.T) { - should := require.New(t) - ptrVal := reflect.New(valType) - stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) - iter := DecodeStr(input) - if stdErr == nil { - should.NoError(iter.Skip()) - should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) - } else { - should.Error(func() error { - if err := iter.Skip(); err != nil { - return err + t.Run(valType.Kind().String(), func(t *testing.T) { + for inputIdx, input := range testCase.inputs { + t.Run(fmt.Sprintf("Test%d", inputIdx), func(t *testing.T) { + t.Cleanup(func() { + if t.Failed() { + t.Logf("Input: %q", input) } - if err := iter.Skip(); err != io.EOF { - return err - } - return nil - }()) - } - }) - } + }) + should := require.New(t) + ptrVal := reflect.New(valType) + stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) + iter := DecodeStr(input) + if stdErr == nil { + should.NoError(iter.Skip()) + should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) + } else { + should.Error(func() error { + if err := iter.Skip(); err != nil { + return err + } + if err := iter.Skip(); err != io.EOF { + return err + } + return nil + }()) + } + }) + } + }) } } diff --git a/dec_skip_test.go b/dec_skip_test.go index 947ea2b..6a55090 100644 --- a/dec_skip_test.go +++ b/dec_skip_test.go @@ -6,17 +6,22 @@ import ( "github.com/stretchr/testify/require" ) -func Test_skip_number_in_array(t *testing.T) { +func TestSkip_number_in_array(t *testing.T) { + var err error + a := require.New(t) d := DecodeStr(`[-0.12, "stream"]`) - d.Elem() - d.Skip() - d.Elem() + _, err = d.Elem() + a.NoError(err) + err = d.Skip() + a.NoError(err) + _, err = d.Elem() + a.NoError(err) if s, _ := d.Str(); s != "stream" { t.FailNow() } } -func Test_skip_string_in_array(t *testing.T) { +func TestSkip_string_in_array(t *testing.T) { d := DecodeStr(`["hello", "stream"]`) d.Elem() d.Skip() @@ -26,7 +31,7 @@ func Test_skip_string_in_array(t *testing.T) { } } -func Test_skip_null(t *testing.T) { +func TestSkip_null(t *testing.T) { d := DecodeStr(`[null , "stream"]`) d.Elem() d.Skip() @@ -36,7 +41,7 @@ func Test_skip_null(t *testing.T) { } } -func Test_skip_true(t *testing.T) { +func TestSkip_true(t *testing.T) { d := DecodeStr(`[true , "stream"]`) d.Elem() d.Skip() @@ -46,7 +51,7 @@ func Test_skip_true(t *testing.T) { } } -func Test_skip_false(t *testing.T) { +func TestSkip_false(t *testing.T) { d := DecodeStr(`[false , "stream"]`) d.Elem() d.Skip() @@ -56,7 +61,7 @@ func Test_skip_false(t *testing.T) { } } -func Test_skip_array(t *testing.T) { +func TestSkip_array(t *testing.T) { d := DecodeStr(`[[1, [2, [3], 4]], "stream"]`) d.Elem() d.Skip() @@ -66,7 +71,7 @@ func Test_skip_array(t *testing.T) { } } -func Test_skip_empty_array(t *testing.T) { +func TestSkip_empty_array(t *testing.T) { d := DecodeStr(`[ [ ], "stream"]`) d.Elem() d.Skip() @@ -76,7 +81,7 @@ func Test_skip_empty_array(t *testing.T) { } } -func Test_skip_nested(t *testing.T) { +func TestSkip_nested(t *testing.T) { d := DecodeStr(`[ {"a" : [{"stream": "c"}], "d": 102 }, "stream"]`) if _, err := d.Elem(); err != nil { t.Fatal(err) @@ -90,7 +95,7 @@ func Test_skip_nested(t *testing.T) { require.Equal(t, "stream", s) } -func Test_skip_simple_nested(t *testing.T) { +func TestSkip_simple_nested(t *testing.T) { d := DecodeStr(`["foo", "bar", "baz"]`) require.NoError(t, d.Skip()) } From b4b52bf514d27c1bd4afd7533c3d7b29c00ba807 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 12 Jan 2022 16:34:44 +0300 Subject: [PATCH 02/11] feat: rewrite skipNumber --- dec_skip.go | 219 ++++++++++++++++++++++++++++++++++------- dec_skip_cases_test.go | 1 - 2 files changed, 185 insertions(+), 35 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 8efbc80..2c1ab2c 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -1,6 +1,8 @@ package jx import ( + "io" + "github.com/go-faster/errors" ) @@ -50,11 +52,8 @@ func (d *Decoder) Skip() error { return d.skipThreeBytes('r', 'u', 'e') // true case 'f': return d.skipFourBytes('a', 'l', 's', 'e') // false - case '0': + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': d.unread() - _, err := d.Float32() - return err - case '-', '1', '2', '3', '4', '5', '6', '7', '8', '9': return d.skipNumber() case '[': if err := d.skipArr(); err != nil { @@ -97,51 +96,203 @@ func (d *Decoder) skipThreeBytes(b1, b2, b3 byte) error { return nil } -func (d *Decoder) skipNumber() error { - ok, err := d.skipNumberFast() - if err != nil || ok { - return err +var ( + closerSet = [256]byte{ + ',': 1, + ']': 1, + '}': 1, + ' ': 1, + '\t': 1, + '\n': 1, + '\r': 1, } - d.unread() - if _, err := d.Float64(); err != nil { - return err + digitSet = [256]byte{ + '0': 1, + '1': 1, + '2': 1, + '3': 1, + '4': 1, + '5': 1, + '6': 1, + '7': 1, + '8': 1, + '9': 1, } - return nil -} +) -func (d *Decoder) skipNumberFast() (ok bool, err error) { - dotFound := false - for i := d.head; i < d.tail; i++ { - c := d.buf[i] +func (d *Decoder) skipNumber() error { + if d.head == d.tail { + return io.ErrUnexpectedEOF + } + c := d.buf[d.head] + d.head++ + switch c { + case '-': + c, err := d.byte() + if err != nil { + return err + } + // Character after '-' must be a digit. + if digitSet[c] == 0 { + return badToken(c) + } + case '0': + // If buffer is empty, try to read more. + if d.head == d.tail { + err := d.read() + if err != nil { + // There is no data anymore. + if err == io.EOF { + return nil + } + return err + } + } + + c = d.buf[d.head] + if closerSet[c] != 0 { + return nil + } switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': case '.': - if dotFound { - return false, errors.New("more than one dot") + goto stateDot + case 'e', 'E': + goto stateExp + default: + return badToken(c) + } + } + for { + for i, c := range d.buf[d.head:d.tail] { + if closerSet[c] != 0 { + d.head += i + return nil } - if i+1 == d.tail { - return false, nil + if digitSet[c] != 0 { + continue } - c = d.buf[i+1] switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case '.': + d.head += i + goto stateDot + case 'e', 'E': + d.head += i + goto stateExp default: - return false, errors.New("no digit after dot") + return badToken(c) + } + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + return nil + } + return err + } + } + +stateDot: + d.head++ + for { + var last byte = '.' + for i, c := range d.buf[d.head:d.tail] { + if closerSet[c] != 0 { + d.head += i + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil + } + last = c + if digitSet[c] != 0 { + continue } - dotFound = true - default: switch c { - case ',', ']', '}', ' ', '\t', '\n', '\r': - if d.head == i { - return false, nil // if - without following digits + case 'e', 'E': + if i == 0 { + return badToken(c) } - d.head = i - return true, nil + d.head += i + goto stateExp + default: + return badToken(c) } - return false, nil + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil + } + return err + } + } +stateExp: + d.head++ + for { + var last byte = 'e' + for i, c := range d.buf[d.head:d.tail] { + if closerSet[c] != 0 { + d.head += i + if last == 'e' { + return io.ErrUnexpectedEOF + } + return nil + } + last = c + if digitSet[c] == 0 { + if c == '-' || c == '+' { + d.head += i + goto stateExpAfterSign + } + return badToken(c) + } + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + if last == 'e' { + return io.ErrUnexpectedEOF + } + return nil + } + return err + } + } +stateExpAfterSign: + d.head++ + for { + var last byte = '+' + for i, c := range d.buf[d.head:d.tail] { + if closerSet[c] != 0 { + d.head += i + if last == '+' { + return io.ErrUnexpectedEOF + } + return nil + } + last = c + if digitSet[c] == 0 { + return badToken(c) + } + } + + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + if last == '+' { + return io.ErrUnexpectedEOF + } + return nil + } + return err } } - return false, nil } func (d *Decoder) skipStr() error { diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 71b391d..dfcdc82 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -13,7 +13,6 @@ import ( func Test_skip(t *testing.T) { type testCase struct { ptr interface{} - name string inputs []string } var testCases []testCase From 9a20f0ee6063689c164248091f5135ebf24a66af Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 12 Jan 2022 16:45:56 +0300 Subject: [PATCH 03/11] test: generate more skip cases --- dec_skip.go | 6 +++--- dec_skip_cases_test.go | 35 ++++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 2c1ab2c..0194304 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -120,10 +120,10 @@ var ( } ) +// skipNumber reads one JSON number. +// +// Assumes d.buf is not empty. func (d *Decoder) skipNumber() error { - if d.head == d.tail { - return io.ErrUnexpectedEOF - } c := d.buf[d.head] d.head++ switch c { diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index dfcdc82..5b3d611 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -30,21 +30,11 @@ func Test_skip(t *testing.T) { `"\t"`, // valid }, }) - testCases = append(testCases, testCase{ - ptr: (*[]interface{})(nil), - inputs: []string{ - `[]`, // valid - `[1]`, // valid - `[ 1, "hello"]`, // valid - `[abc]`, // invalid - `[`, // invalid - `[[]`, // invalid - }, - }) - testCases = append(testCases, testCase{ + numberCase := testCase{ ptr: (*float64)(nil), inputs: []string{ "0", // valid + "-", // invalid "+1", // invalid "-a", // invalid "-\x00", // invalid, zero byte @@ -52,6 +42,9 @@ func Test_skip(t *testing.T) { "0e1", // valid "0e+1", // valid "0e-1", // valid + "0e-11", // valid + "0e-1a", // invalid + "0e-1+", // invalid "0e", // invalid "-e", // invalid "+e", // invalid @@ -72,7 +65,23 @@ func Test_skip(t *testing.T) { "10.", // invalid "-0.12", // valid }, - }) + } + testCases = append(testCases, numberCase) + arrayCase := testCase{ + ptr: (*[]interface{})(nil), + inputs: []string{ + `[]`, // valid + `[1]`, // valid + `[ 1, "hello"]`, // valid + `[abc]`, // invalid + `[`, // invalid + `[[]`, // invalid + }, + } + for _, c := range numberCase.inputs { + arrayCase.inputs = append(arrayCase.inputs, `[`+c+`]`) + } + testCases = append(testCases, arrayCase) testCases = append(testCases, testCase{ ptr: (*struct{})(nil), inputs: []string{ From 1da94379667a90b22d8d29f74efa12b48b23248a Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 12 Jan 2022 17:15:11 +0300 Subject: [PATCH 04/11] test: add floats benchmark --- bench_test.go | 56 ++++++++++-------------- testdata/floats.json | 102 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 32 deletions(-) create mode 100644 testdata/floats.json diff --git a/bench_test.go b/bench_test.go index 3719510..9c695b8 100644 --- a/bench_test.go +++ b/bench_test.go @@ -12,8 +12,12 @@ import ( "github.com/go-faster/errors" ) -//go:embed testdata/file.json -var benchData []byte +var ( + //go:embed testdata/file.json + benchData []byte + //go:embed testdata/floats.json + floatsData []byte +) func Benchmark_large_file(b *testing.B) { b.Run("JX", func(b *testing.B) { @@ -163,37 +167,25 @@ func Benchmark_large_file(b *testing.B) { } func BenchmarkValid(b *testing.B) { - b.Run("JX", func(b *testing.B) { - b.ReportAllocs() - b.SetBytes(int64(len(benchData))) - var d Decoder - for n := 0; n < b.N; n++ { - d.ResetBytes(benchData) - if err := d.Validate(); err != nil { - b.Fatal(err) - } - } - }) - b.Run("Std", func(b *testing.B) { - b.ReportAllocs() - b.SetBytes(int64(len(benchData))) - - for n := 0; n < b.N; n++ { - if !json.Valid(benchData) { - b.Fatal("invalid") + bch := []struct { + name string + input []byte + }{ + {"Big", benchData}, + {"Floats", floatsData}, + } + for _, bench := range bch { + b.Run(bench.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(bench.input))) + var d Decoder + for n := 0; n < b.N; n++ { + d.ResetBytes(bench.input) + if err := d.Validate(); err != nil { + b.Fatal(err) + } } - } - }) -} - -func Benchmark_std_large_file(b *testing.B) { - b.ReportAllocs() - for n := 0; n < b.N; n++ { - var result []struct{} - err := json.Unmarshal(benchData, &result) - if err != nil { - b.Error(err) - } + }) } } diff --git a/testdata/floats.json b/testdata/floats.json new file mode 100644 index 0000000..981c45b --- /dev/null +++ b/testdata/floats.json @@ -0,0 +1,102 @@ +[ + 0.6046602879796196, + 0.9405090880450124, + 0.6645600532184904, + 0.4377141871869802, + 0.4246374970712657, + 0.6868230728671094, + 0.06563701921747622, + 0.15651925473279124, + 0.09696951891448456, + 0.30091186058528707, + 0.5152126285020654, + 0.8136399609900968, + 0.21426387258237492, + 0.380657189299686, + 0.31805817433032985, + 0.4688898449024232, + 0.28303415118044517, + 0.29310185733681576, + 0.6790846759202163, + 0.21855305259276428, + 0.20318687664732285, + 0.360871416856906, + 0.5706732760710226, + 0.8624914374478864, + 0.29311424455385804, + 0.29708256355629153, + 0.7525730355516119, + 0.2065826619136986, + 0.865335013001561, + 0.6967191657466347, + 0.5238203060500009, + 0.028303083325889995, + 0.15832827774512764, + 0.6072534395455154, + 0.9752416188605784, + 0.07945362337387198, + 0.5948085976830626, + 0.05912065131387529, + 0.692024587353112, + 0.30152268100656, + 0.17326623818270528, + 0.5410998550087353, + 0.544155573000885, + 0.27850762181610883, + 0.4231522015718281, + 0.5305857153507052, + 0.2535405005150605, + 0.28208099496492467, + 0.7886049150193449, + 0.3618054804803169, + 0.8805431227416171, + 0.2971122606397708, + 0.8943617293304537, + 0.09745461839911657, + 0.9769168685862624, + 0.07429099894984302, + 0.22228941700678773, + 0.6810783123925709, + 0.24151508854715265, + 0.31152244431052484, + 0.932846428518434, + 0.741848959991823, + 0.8010550426526613, + 0.7302314772948083, + 0.18292491645390843, + 0.4283570818068078, + 0.8969919575618727, + 0.6826534880132438, + 0.9789293555766876, + 0.9222122589217269, + 0.09083727535388708, + 0.4931419977048804, + 0.9269868035744142, + 0.9549454404167818, + 0.3479539636282229, + 0.6908388315056789, + 0.7109071952999951, + 0.5637795958152644, + 0.6494894605929404, + 0.5517650490127749, + 0.7558235074915978, + 0.40380328579570035, + 0.13065111702897217, + 0.9859647293402467, + 0.8963417453962161, + 0.3220839705208817, + 0.7211477651926741, + 0.6445397825093294, + 0.08552050754191123, + 0.6695752976997745, + 0.6227283173637045, + 0.3696928436398219, + 0.2368225468054852, + 0.5352818906344061, + 0.18724610140105305, + 0.2388407028053186, + 0.6280981712183633, + 0.1267529293726013, + 0.28133029380535923, + 0.41032284435628247 +] From 510eda1067aa9f933935bb0ed49b187379a5e3a8 Mon Sep 17 00:00:00 2001 From: Aleksandr Razumov Date: Wed, 12 Jan 2022 19:35:32 +0300 Subject: [PATCH 05/11] test(fuzz): update Valid fuzzer --- fuzz_test.go | 9 ++++++++- ...295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a diff --git a/fuzz_test.go b/fuzz_test.go index ee0c618..9c9859a 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -5,6 +5,7 @@ package jx import ( "bytes" + "encoding/json" "testing" "github.com/go-faster/errors" @@ -22,7 +23,13 @@ func FuzzValid(f *testing.F) { f.Add([]byte(s)) } f.Fuzz(func(t *testing.T, data []byte) { - Valid(data) + var ( + std = json.Valid(data) + jx = Valid(data) + ) + if std != jx { + t.Fatalf(`Valid(%#v): %v (std) != %v (jx)`, string(data), std, jx) + } }) } diff --git a/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a b/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a new file mode 100644 index 0000000..b52344a --- /dev/null +++ b/testdata/fuzz/FuzzValid/607400f9a48295c9f1dd240bf6419d172a24fc4085b4b954dae35ffa6447d64a @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0E0+0") From 176748e2ecb87c86be09f9d7bcc5f768f32cb801 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 05:49:38 +0300 Subject: [PATCH 06/11] fix: 0e0.0 case in skipNumber --- dec_skip.go | 52 ++++++++++--------------------- dec_skip_cases_test.go | 71 ++++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 67 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 0194304..9a70c76 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -233,50 +233,33 @@ stateDot: } stateExp: d.head++ - for { - var last byte = 'e' - for i, c := range d.buf[d.head:d.tail] { - if closerSet[c] != 0 { - d.head += i - if last == 'e' { - return io.ErrUnexpectedEOF - } - return nil - } - last = c - if digitSet[c] == 0 { - if c == '-' || c == '+' { - d.head += i - goto stateExpAfterSign - } - return badToken(c) - } + // There must be a number or sign after e. + { + v, err := d.byte() + if err != nil { + return err } - - if err := d.read(); err != nil { - // There is no data anymore. - if err == io.EOF { - if last == 'e' { - return io.ErrUnexpectedEOF + if digitSet[v] == 0 { + // There must be a number after e. + if v == '-' || v == '+' { + v, err := d.byte() + if err != nil { + return err } - return nil + if digitSet[v] == 0 { + return badToken(v) + } + } else { + return badToken(v) } - return err } } -stateExpAfterSign: - d.head++ for { - var last byte = '+' for i, c := range d.buf[d.head:d.tail] { if closerSet[c] != 0 { d.head += i - if last == '+' { - return io.ErrUnexpectedEOF - } return nil } - last = c if digitSet[c] == 0 { return badToken(c) } @@ -285,9 +268,6 @@ stateExpAfterSign: if err := d.read(); err != nil { // There is no data anymore. if err == io.EOF { - if last == '+' { - return io.ErrUnexpectedEOF - } return nil } return err diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 5b3d611..f09d411 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -33,37 +33,46 @@ func Test_skip(t *testing.T) { numberCase := testCase{ ptr: (*float64)(nil), inputs: []string{ - "0", // valid - "-", // invalid - "+1", // invalid - "-a", // invalid - "-\x00", // invalid, zero byte - "0.1", // valid - "0e1", // valid - "0e+1", // valid - "0e-1", // valid - "0e-11", // valid - "0e-1a", // invalid - "0e-1+", // invalid - "0e", // invalid - "-e", // invalid - "+e", // invalid - ".e", // invalid - "0.e", // invalid - "0.e", // invalid - "0.0e", // invalid - "0.0e1", // valid - "0.0e+1", // valid - "0.0e+", // invalid - "0.0e-", // invalid - "0..1", // invalid, more dot - "1e+1", // valid - "1+1", // invalid - "1E1", // valid, e or E - "1ee1", // invalid - "100a", // invalid - "10.", // invalid - "-0.12", // valid + "0", // valid + "-", // invalid + "+", // invalid + "+1", // invalid + "-a", // invalid + "-\x00", // invalid, zero byte + "0.1", // valid + "0e1", // valid + "0e+1", // valid + "0e-1", // valid + "0e-11", // valid + "0e-1a", // invalid + "0e-1+", // invalid + "0e", // invalid + "e", // invalid + "-e", // invalid + "+e", // invalid + ".e", // invalid + "e.", // invalid + "0.e", // invalid + "0-e", // invalid + "0e-", // invalid + "0e+", // invalid + "0.0e", // invalid + "0.0e1", // valid + "0.0e+", // invalid + "0.0e-", // invalid + "0e0+0", // invalid + "0.e0+0", // invalid + "0.0e+0", // valid + "0.0e+1", // valid + "0.0e0+0", // invalid + "0..1", // invalid, more dot + "1e+1", // valid + "1+1", // invalid + "1E1", // valid, e or E + "1ee1", // invalid + "100a", // invalid + "10.", // invalid + "-0.12", // valid }, } testCases = append(testCases, numberCase) From 5650462298aadbb222830c99cd20b4af50564074 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 05:56:10 +0300 Subject: [PATCH 07/11] fix: some cases for stream reading --- dec_skip.go | 3 +++ dec_skip_cases_test.go | 40 ++++++++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 9a70c76..742f1b3 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -186,6 +186,7 @@ func (d *Decoder) skipNumber() error { if err := d.read(); err != nil { // There is no data anymore. if err == io.EOF { + d.head = d.tail return nil } return err @@ -223,6 +224,7 @@ stateDot: if err := d.read(); err != nil { // There is no data anymore. if err == io.EOF { + d.head = d.tail if last == '.' { return io.ErrUnexpectedEOF } @@ -268,6 +270,7 @@ stateExp: if err := d.read(); err != nil { // There is no data anymore. if err == io.EOF { + d.head = d.tail return nil } return err diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index f09d411..9783f6c 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "reflect" + "strings" "testing" "github.com/stretchr/testify/require" @@ -106,6 +107,27 @@ func Test_skip(t *testing.T) { `{abc}`, // invalid }, }) + + testDecode := func(iter *Decoder, stdErr error) func(t *testing.T) { + return func(t *testing.T) { + should := require.New(t) + + if stdErr == nil { + should.NoError(iter.Skip()) + should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) + } else { + should.Error(func() error { + if err := iter.Skip(); err != nil { + return err + } + if err := iter.Skip(); err != io.EOF { + return err + } + return nil + }()) + } + } + } for _, testCase := range testCases { valType := reflect.TypeOf(testCase.ptr).Elem() t.Run(valType.Kind().String(), func(t *testing.T) { @@ -116,24 +138,10 @@ func Test_skip(t *testing.T) { t.Logf("Input: %q", input) } }) - should := require.New(t) ptrVal := reflect.New(valType) stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) - iter := DecodeStr(input) - if stdErr == nil { - should.NoError(iter.Skip()) - should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) - } else { - should.Error(func() error { - if err := iter.Skip(); err != nil { - return err - } - if err := iter.Skip(); err != io.EOF { - return err - } - return nil - }()) - } + t.Run("Buffer", testDecode(DecodeStr(input), stdErr)) + t.Run("Reader", testDecode(Decode(strings.NewReader(input), 512), stdErr)) }) } }) From fe101be6c044ee1b6aa75dff1f926a80e3e6e602 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 07:23:02 +0300 Subject: [PATCH 08/11] test: add OneByteReader test --- dec_skip_cases_test.go | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 9783f6c..9ecc738 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -7,11 +7,12 @@ import ( "reflect" "strings" "testing" + "testing/iotest" "github.com/stretchr/testify/require" ) -func Test_skip(t *testing.T) { +func TestSkip(t *testing.T) { type testCase struct { ptr interface{} inputs []string @@ -37,6 +38,7 @@ func Test_skip(t *testing.T) { "0", // valid "-", // invalid "+", // invalid + "-1", // valid "+1", // invalid "-a", // invalid "-\x00", // invalid, zero byte @@ -46,6 +48,7 @@ func Test_skip(t *testing.T) { "0e-1", // valid "0e-11", // valid "0e-1a", // invalid + "1.e1", // invalid "0e-1+", // invalid "0e", // invalid "e", // invalid @@ -108,10 +111,15 @@ func Test_skip(t *testing.T) { }, }) - testDecode := func(iter *Decoder, stdErr error) func(t *testing.T) { + testDecode := func(iter *Decoder, input string, stdErr error) func(t *testing.T) { return func(t *testing.T) { - should := require.New(t) + t.Cleanup(func() { + if t.Failed() { + t.Logf("Input: %q", input) + } + }) + should := require.New(t) if stdErr == nil { should.NoError(iter.Skip()) should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) @@ -133,15 +141,21 @@ func Test_skip(t *testing.T) { t.Run(valType.Kind().String(), func(t *testing.T) { for inputIdx, input := range testCase.inputs { t.Run(fmt.Sprintf("Test%d", inputIdx), func(t *testing.T) { - t.Cleanup(func() { - if t.Failed() { - t.Logf("Input: %q", input) - } - }) ptrVal := reflect.New(valType) stdErr := json.Unmarshal([]byte(input), ptrVal.Interface()) - t.Run("Buffer", testDecode(DecodeStr(input), stdErr)) - t.Run("Reader", testDecode(Decode(strings.NewReader(input), 512), stdErr)) + + t.Run("Buffer", testDecode(DecodeStr(input), input, stdErr)) + + r := strings.NewReader(input) + d := Decode(r, 512) + t.Run("Reader", testDecode(d, input, stdErr)) + + // FIMXE(tdakkota): fix string skipping + if valType.Kind() != reflect.String { + r.Reset(input) + obr := iotest.OneByteReader(r) + t.Run("OneByteReader", testDecode(Decode(obr, 512), input, stdErr)) + } }) } }) From efb411fd59fd04fb78579329b7afaedb5b1b3c66 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 07:23:52 +0300 Subject: [PATCH 09/11] fix: invalid -00 number skipping --- dec_skip.go | 78 +++++++++++++++++++++++------------------- dec_skip_cases_test.go | 3 ++ 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 742f1b3..3e2c379 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -136,6 +136,10 @@ func (d *Decoder) skipNumber() error { if digitSet[c] == 0 { return badToken(c) } + if c != '0' { + break + } + fallthrough case '0': // If buffer is empty, try to read more. if d.head == d.tail { @@ -195,64 +199,68 @@ func (d *Decoder) skipNumber() error { stateDot: d.head++ - for { + { var last byte = '.' - for i, c := range d.buf[d.head:d.tail] { - if closerSet[c] != 0 { - d.head += i - if last == '.' { - return io.ErrUnexpectedEOF + for { + for i, c := range d.buf[d.head:d.tail] { + if closerSet[c] != 0 { + d.head += i + // Check that dot is not last character. + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil } - return nil - } - last = c - if digitSet[c] != 0 { - continue - } - switch c { - case 'e', 'E': - if i == 0 { + if digitSet[c] != 0 { + last = c + continue + } + switch c { + case 'e', 'E': + if last == '.' { + return badToken(c) + } + d.head += i + goto stateExp + default: return badToken(c) } - d.head += i - goto stateExp - default: - return badToken(c) } - } - if err := d.read(); err != nil { - // There is no data anymore. - if err == io.EOF { - d.head = d.tail - if last == '.' { - return io.ErrUnexpectedEOF + if err := d.read(); err != nil { + // There is no data anymore. + if err == io.EOF { + d.head = d.tail + // Check that dot is not last character. + if last == '.' { + return io.ErrUnexpectedEOF + } + return nil } - return nil + return err } - return err } } stateExp: d.head++ // There must be a number or sign after e. { - v, err := d.byte() + numOrSign, err := d.byte() if err != nil { return err } - if digitSet[v] == 0 { + if digitSet[numOrSign] == 0 { // There must be a number after e. - if v == '-' || v == '+' { - v, err := d.byte() + if numOrSign == '-' || numOrSign == '+' { + num, err := d.byte() if err != nil { return err } - if digitSet[v] == 0 { - return badToken(v) + if digitSet[num] == 0 { + return badToken(num) } } else { - return badToken(v) + return badToken(numOrSign) } } } diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 9ecc738..57b5963 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -41,6 +41,9 @@ func TestSkip(t *testing.T) { "-1", // valid "+1", // invalid "-a", // invalid + "-0", // valid + "-00", // invalid + "-01", // invalid "-\x00", // invalid, zero byte "0.1", // valid "0e1", // valid From 85c2c59792165b640abd5641ee11a66e29052882 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 09:37:59 +0300 Subject: [PATCH 10/11] fix: merge skipNumber sets to reduce cache load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta Valid/Big-12 3.98µs ± 5% 3.60µs ± 2% -9.63% (p=0.000 n=25+24) Valid/Floats-12 3.23µs ± 4% 2.91µs ± 1% -9.92% (p=0.000 n=25+24) name old speed new speed delta Valid/Big-12 657MB/s ± 5% 727MB/s ± 2% +10.61% (p=0.000 n=25+24) Valid/Floats-12 720MB/s ± 3% 799MB/s ± 1% +10.98% (p=0.000 n=25+24) --- dec_skip.go | 51 +++++++++++++++++++++++------------------- dec_skip_bench_test.go | 34 +--------------------------- dec_skip_cases_test.go | 3 +++ 3 files changed, 32 insertions(+), 56 deletions(-) diff --git a/dec_skip.go b/dec_skip.go index 3e2c379..03d9556 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -97,16 +97,7 @@ func (d *Decoder) skipThreeBytes(b1, b2, b3 byte) error { } var ( - closerSet = [256]byte{ - ',': 1, - ']': 1, - '}': 1, - ' ': 1, - '\t': 1, - '\n': 1, - '\r': 1, - } - digitSet = [256]byte{ + skipNumberSet = [256]byte{ '0': 1, '1': 1, '2': 1, @@ -117,6 +108,14 @@ var ( '7': 1, '8': 1, '9': 1, + + ',': 2, + ']': 2, + '}': 2, + ' ': 2, + '\t': 2, + '\n': 2, + '\r': 2, } ) @@ -124,6 +123,10 @@ var ( // // Assumes d.buf is not empty. func (d *Decoder) skipNumber() error { + const ( + digitTag byte = 1 + closerTag byte = 2 + ) c := d.buf[d.head] d.head++ switch c { @@ -133,7 +136,7 @@ func (d *Decoder) skipNumber() error { return err } // Character after '-' must be a digit. - if digitSet[c] == 0 { + if skipNumberSet[c] != digitTag { return badToken(c) } if c != '0' { @@ -154,7 +157,7 @@ func (d *Decoder) skipNumber() error { } c = d.buf[d.head] - if closerSet[c] != 0 { + if skipNumberSet[c] == closerTag { return nil } switch c { @@ -168,13 +171,14 @@ func (d *Decoder) skipNumber() error { } for { for i, c := range d.buf[d.head:d.tail] { - if closerSet[c] != 0 { + switch skipNumberSet[c] { + case closerTag: d.head += i return nil - } - if digitSet[c] != 0 { + case digitTag: continue } + switch c { case '.': d.head += i @@ -203,18 +207,19 @@ stateDot: var last byte = '.' for { for i, c := range d.buf[d.head:d.tail] { - if closerSet[c] != 0 { + switch skipNumberSet[c] { + case closerTag: d.head += i // Check that dot is not last character. if last == '.' { return io.ErrUnexpectedEOF } return nil - } - if digitSet[c] != 0 { + case digitTag: last = c continue } + switch c { case 'e', 'E': if last == '.' { @@ -249,14 +254,14 @@ stateExp: if err != nil { return err } - if digitSet[numOrSign] == 0 { - // There must be a number after e. + if skipNumberSet[numOrSign] != digitTag { // If next character is not a digit, check for sign. if numOrSign == '-' || numOrSign == '+' { num, err := d.byte() if err != nil { return err } - if digitSet[num] == 0 { + // There must be a number after sign. + if skipNumberSet[num] != digitTag { return badToken(num) } } else { @@ -266,11 +271,11 @@ stateExp: } for { for i, c := range d.buf[d.head:d.tail] { - if closerSet[c] != 0 { + if skipNumberSet[c] == closerTag { d.head += i return nil } - if digitSet[c] == 0 { + if skipNumberSet[c] == 0 { return badToken(c) } } diff --git a/dec_skip_bench_test.go b/dec_skip_bench_test.go index 5a106c3..c5f4085 100644 --- a/dec_skip_bench_test.go +++ b/dec_skip_bench_test.go @@ -1,7 +1,6 @@ package jx import ( - "encoding/json" "testing" ) @@ -9,7 +8,7 @@ type TestResp struct { Code uint64 } -func Benchmark_skip(b *testing.B) { +func BenchmarkSkip(b *testing.B) { input := []byte(` { "_shards":{ @@ -51,34 +50,3 @@ func Benchmark_skip(b *testing.B) { } } } - -func Benchmark_std_skip(b *testing.B) { - input := []byte(` -{ - "_shards":{ - "total" : 5, - "successful" : 5, - "failed" : 0 - }, - "hits":{ - "total" : 1, - "hits" : [ - { - "_index" : "twitter", - "_type" : "tweet", - "_id" : "1", - "_source" : { - "user" : "kimchy", - "postDate" : "2009-11-15T14:12:12", - "message" : "trying out Elasticsearch" - } - } - ] - }, - "code": 200 -}`) - for n := 0; n < b.N; n++ { - result := TestResp{} - _ = json.Unmarshal(input, &result) - } -} diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 57b5963..bec096e 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -80,6 +80,9 @@ func TestSkip(t *testing.T) { "100a", // invalid "10.", // invalid "-0.12", // valid + "0]", // invalid + "0e]", // invalid + "0e+]", // invalid }, } testCases = append(testCases, numberCase) From 29e97c528f57463aa6cc2b3c1a63a6d19df87aa4 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 13 Jan 2022 10:25:43 +0300 Subject: [PATCH 11/11] fix: check for unescaped char in strSlow --- dec_skip_cases_test.go | 9 +++------ dec_str.go | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index bec096e..bff381c 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -156,12 +156,9 @@ func TestSkip(t *testing.T) { d := Decode(r, 512) t.Run("Reader", testDecode(d, input, stdErr)) - // FIMXE(tdakkota): fix string skipping - if valType.Kind() != reflect.String { - r.Reset(input) - obr := iotest.OneByteReader(r) - t.Run("OneByteReader", testDecode(Decode(obr, 512), input, stdErr)) - } + r.Reset(input) + obr := iotest.OneByteReader(r) + t.Run("OneByteReader", testDecode(Decode(obr, 512), input, stdErr)) }) } }) diff --git a/dec_str.go b/dec_str.go index 8d911a7..3660991 100644 --- a/dec_str.go +++ b/dec_str.go @@ -130,6 +130,9 @@ func (d *Decoder) strSlow(v value) (value, error) { return v, errors.Wrap(err, "escape") } default: + if c < ' ' { + return value{}, badToken(c) + } v = v.byte(c) } }