From 1e1234bf2e512a68809dc92780ba37d32a4b2253 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Tue, 17 Jan 2023 01:08:10 +0300 Subject: [PATCH 01/10] feat: pass offset to `unexpected byte` error Simplify error reporting. --- dec.go | 7 ++++++- dec_arr.go | 19 ++++++++++--------- dec_arr_iter.go | 7 ++++--- dec_bool.go | 13 ++++++++----- dec_capture.go | 6 +++++- dec_error.go | 17 +++++++++++++++++ dec_float.go | 6 +++--- dec_int.go | 30 ++++++++++++++++-------------- dec_null.go | 7 +++++-- dec_num.go | 3 ++- dec_obj.go | 25 +++++++++++++------------ dec_obj_iter.go | 14 ++++++++------ dec_read.go | 10 ++++++---- dec_skip.go | 42 +++++++++++++++++++++--------------------- dec_str.go | 34 ++++++++++++---------------------- num.go | 4 ++-- 16 files changed, 138 insertions(+), 106 deletions(-) create mode 100644 dec_error.go diff --git a/dec.go b/dec.go index 7786752..8195713 100644 --- a/dec.go +++ b/dec.go @@ -85,7 +85,8 @@ type Decoder struct { head int // offset in buf to start of current json stream tail int // offset in buf to end of current json stream - depth int + streamOffset int // for reader, offset in stream to start of current buf contents + depth int } const defaultBuf = 512 @@ -114,6 +115,10 @@ func DecodeStr(input string) *Decoder { return DecodeBytes([]byte(input)) } +func (d *Decoder) offset() int { + return d.streamOffset + d.head +} + // Reset resets reader and underlying state, next reads will use provided io.Reader. func (d *Decoder) Reset(reader io.Reader) { d.reader = reader diff --git a/dec_arr.go b/dec_arr.go index 6040e79..baff8e0 100644 --- a/dec_arr.go +++ b/dec_arr.go @@ -17,7 +17,7 @@ func (d *Decoder) Elem() (ok bool, err error) { case '[': c, err := d.more() if err != nil { - return false, errors.Wrap(err, "next") + return false, err } if c != ']' { d.unread() @@ -29,24 +29,24 @@ func (d *Decoder) Elem() (ok bool, err error) { case ',': return true, nil default: - return false, errors.Wrap(badToken(c), `"[" or "," or "]" expected`) + return false, errors.Wrap(badToken(c, d.offset()), `"[", "," or "]" expected`) } } // Arr decodes array and invokes callback on each array element. func (d *Decoder) Arr(f func(d *Decoder) error) error { if err := d.consume('['); err != nil { - return errors.Wrap(err, "start") + return errors.Wrap(err, `"[" expected`) } if f == nil { return d.skipArr() } if err := d.incDepth(); err != nil { - return errors.Wrap(err, "inc") + return err } c, err := d.more() if err != nil { - return err + return errors.Wrap(err, `value or "]" expected`) } if c == ']' { return d.decDepth() @@ -58,23 +58,24 @@ func (d *Decoder) Arr(f func(d *Decoder) error) error { c, err = d.more() if err != nil { - return errors.Wrap(err, "next") + return errors.Wrap(err, `"," or "]" expected`) } for c == ',' { // Skip whitespace before reading element. if _, err := d.next(); err != nil { - return errors.Wrap(err, "next") + return err } d.unread() if err := f(d); err != nil { return errors.Wrap(err, "callback") } if c, err = d.next(); err != nil { - return errors.Wrap(err, "next") + return err } } if c != ']' { - return errors.Wrap(badToken(c), "end") + err := badToken(c, d.offset()-1) + return errors.Wrap(err, `"]" expected`) } return d.decDepth() } diff --git a/dec_arr_iter.go b/dec_arr_iter.go index 16c4946..15abd16 100644 --- a/dec_arr_iter.go +++ b/dec_arr_iter.go @@ -15,10 +15,10 @@ type ArrIter struct { // ArrIter creates new array iterator. func (d *Decoder) ArrIter() (ArrIter, error) { if err := d.consume('['); err != nil { - return ArrIter{}, errors.Wrap(err, "start") + return ArrIter{}, errors.Wrap(err, `"[" expected`) } if err := d.incDepth(); err != nil { - return ArrIter{}, errors.Wrap(err, "inc") + return ArrIter{}, err } if _, err := d.more(); err != nil { return ArrIter{}, err @@ -46,7 +46,8 @@ func (i *ArrIter) Next() bool { } if i.comma { if c != ',' { - i.err = badToken(c) + err := badToken(c, dec.offset()-1) + i.err = errors.Wrap(err, `"," expected`) return false } } else { diff --git a/dec_bool.go b/dec_bool.go index 25fdcd0..30a93b9 100644 --- a/dec_bool.go +++ b/dec_bool.go @@ -6,7 +6,10 @@ func (d *Decoder) Bool() (bool, error) { return false, err } - var buf [4]byte + var ( + offset = d.offset() + buf [4]byte + ) if err := d.readExact4(&buf); err != nil { return false, err } @@ -20,19 +23,19 @@ func (d *Decoder) Bool() (bool, error) { return false, err } if c != 'e' { - return false, badToken(c) + return false, badToken(c, offset+4) } return false, nil default: switch c := buf[0]; c { case 't': const encodedTrue = 't' | 'r'<<8 | 'u'<<16 | 'e'<<24 - return false, findInvalidToken4(buf, encodedTrue) + return false, findInvalidToken4(buf, encodedTrue, offset) case 'f': const encodedFals = 'f' | 'a'<<8 | 'l'<<16 | 's'<<24 - return false, findInvalidToken4(buf, encodedFals) + return false, findInvalidToken4(buf, encodedFals, offset) default: - return false, badToken(c) + return false, badToken(c, offset) } } } diff --git a/dec_capture.go b/dec_capture.go index 5a9e643..1d1e417 100644 --- a/dec_capture.go +++ b/dec_capture.go @@ -13,10 +13,14 @@ func (d *Decoder) Capture(f func(d *Decoder) error) error { if d.reader != nil { // TODO(tdakkota): May it be more efficient? - var buf bytes.Buffer + var ( + buf bytes.Buffer + streamOffset = d.streamOffset + ) reader := io.TeeReader(d.reader, &buf) defer func() { d.reader = io.MultiReader(&buf, d.reader) + d.streamOffset = streamOffset }() d.reader = reader } diff --git a/dec_error.go b/dec_error.go new file mode 100644 index 0000000..2f957f2 --- /dev/null +++ b/dec_error.go @@ -0,0 +1,17 @@ +package jx + +import "fmt" + +// badTokenErr means that Token was unexpected while decoding. +type badTokenErr struct { + Token byte + Offset int +} + +func (e *badTokenErr) Error() string { + return fmt.Sprintf("unexpected byte %d %q at %d", e.Token, e.Token, e.Offset) +} + +func badToken(c byte, offset int) error { + return &badTokenErr{Token: c, Offset: offset} +} diff --git a/dec_float.go b/dec_float.go index d6a7915..68c7061 100644 --- a/dec_float.go +++ b/dec_float.go @@ -71,7 +71,7 @@ func (d *Decoder) BigInt() (*big.Int, error) { func (d *Decoder) Float32() (float32, error) { c, err := d.more() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if c != '-' { d.unread() @@ -227,7 +227,7 @@ func (d *Decoder) float32Slow() (float32, error) { func (d *Decoder) Float64() (float64, error) { c, err := d.more() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if floatDigits[c] >= 0 { d.unread() @@ -241,7 +241,7 @@ func (d *Decoder) Float64() (float64, error) { } return -v, err default: - return 0, badToken(c) + return 0, badToken(c, d.offset()) } } diff --git a/dec_int.go b/dec_int.go index d64d3c1..dc574cf 100644 --- a/dec_int.go +++ b/dec_int.go @@ -77,7 +77,7 @@ func (d *Decoder) UInt() (uint, error) { func (d *Decoder) Int8() (int8, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if c == '-' { val, err := d.readUInt8() @@ -108,7 +108,7 @@ func (d *Decoder) UInt8() (uint8, error) { func (d *Decoder) readUInt8() (uint8, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } ind := intDigits[c] if ind == 0 { @@ -171,7 +171,7 @@ func (d *Decoder) readUInt8() (uint8, error) { func (d *Decoder) Int16() (int16, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if c == '-' { val, err := d.readUInt16() @@ -202,7 +202,7 @@ func (d *Decoder) UInt16() (uint16, error) { func (d *Decoder) readUInt16() (uint16, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } ind := intDigits[c] if ind == 0 { @@ -277,7 +277,7 @@ func (d *Decoder) readUInt16() (uint16, error) { func (d *Decoder) Int32() (int32, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if c == '-' { val, err := d.readUInt32() @@ -308,7 +308,7 @@ func (d *Decoder) UInt32() (uint32, error) { func (d *Decoder) readUInt32() (uint32, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } ind := intDigits[c] if ind == 0 { @@ -401,7 +401,7 @@ func (d *Decoder) readUInt32() (uint32, error) { func (d *Decoder) Int64() (int64, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } if c == '-' { c, err := d.next() @@ -431,7 +431,7 @@ func (d *Decoder) Int64() (int64, error) { func (d *Decoder) UInt64() (uint64, error) { c, err := d.byte() if err != nil { - return 0, errors.Wrap(err, "byte") + return 0, err } return d.readUInt64(c) } @@ -442,7 +442,8 @@ func (d *Decoder) readUInt64(c byte) (uint64, error) { return 0, nil // single zero } if ind == invalidCharForNumber { - return 0, errors.Wrap(badToken(c), "invalid number") + err := badToken(c, d.offset()-1) + return 0, errors.Wrap(err, "invalid number") } value := uint64(ind) if d.tail-d.head > 10 { @@ -514,12 +515,13 @@ func (d *Decoder) readUInt64(c byte) (uint64, error) { } value = (value << 3) + (value << 1) + uint64(ind) } - err := d.read() - if err == io.EOF { + switch err := d.read(); err { + case io.EOF: return value, nil - } - if err != nil { - return 0, errors.Wrap(err, "read") + case nil: + continue + default: + return 0, err } } } diff --git a/dec_null.go b/dec_null.go index d109226..409b56b 100644 --- a/dec_null.go +++ b/dec_null.go @@ -7,14 +7,17 @@ func (d *Decoder) Null() error { return err } - var buf [4]byte + var ( + offset = d.offset() + buf [4]byte + ) if err := d.readExact4(&buf); err != nil { return err } if string(buf[:]) != "null" { const encodedNull = 'n' | 'u'<<8 | 'l'<<16 | 'l'<<24 - return findInvalidToken4(buf, encodedNull) + return findInvalidToken4(buf, encodedNull, offset) } return nil } diff --git a/dec_num.go b/dec_num.go index f3c695b..2b1b7bd 100644 --- a/dec_num.go +++ b/dec_num.go @@ -20,6 +20,7 @@ func (d *Decoder) NumAppend(v Num) (Num, error) { func (d *Decoder) num(v Num, forceAppend bool) (Num, error) { switch d.Next() { case String: + offset := d.offset() start := d.head str, err := d.str(value{raw: true}) @@ -44,7 +45,7 @@ func (d *Decoder) num(v Num, forceAppend bool) (Num, error) { return Num{}, errors.Wrap(err, "skip number") } default: - return nil, badToken(c) + return nil, badToken(c, offset) } } diff --git a/dec_obj.go b/dec_obj.go index d0c135b..f8281af 100644 --- a/dec_obj.go +++ b/dec_obj.go @@ -9,17 +9,17 @@ import ( // The key value is valid only until f is not returned. func (d *Decoder) ObjBytes(f func(d *Decoder, key []byte) error) error { if err := d.consume('{'); err != nil { - return errors.Wrap(err, "start") + return errors.Wrap(err, `"{" expected`) } if f == nil { return d.skipObj() } if err := d.incDepth(); err != nil { - return errors.Wrap(err, "inc") + return err } c, err := d.more() if err != nil { - return errors.Wrap(err, "next") + return errors.Wrap(err, `'"' or "}" expected`) } if c == '}' { return d.decDepth() @@ -34,14 +34,14 @@ func (d *Decoder) ObjBytes(f func(d *Decoder, key []byte) error) error { k, err := d.str(value{raw: isBuffer}) if err != nil { - return errors.Wrap(err, "str") + return errors.Wrap(err, "field name") } if err := d.consume(':'); err != nil { - return errors.Wrap(err, "field") + return errors.Wrap(err, `":" expected`) } // Skip whitespace. if _, err = d.more(); err != nil { - return errors.Wrap(err, "more") + return err } d.unread() if err := f(d, k.buf); err != nil { @@ -50,30 +50,31 @@ func (d *Decoder) ObjBytes(f func(d *Decoder, key []byte) error) error { c, err = d.more() if err != nil { - return errors.Wrap(err, "next") + return errors.Wrap(err, `"," or "}" expected`) } for c == ',' { k, err := d.str(value{raw: isBuffer}) if err != nil { - return errors.Wrap(err, "str") + return errors.Wrap(err, "field name") } if err := d.consume(':'); err != nil { - return errors.Wrap(err, "field") + return errors.Wrap(err, `":" expected`) } // Check that value exists. if _, err = d.more(); err != nil { - return errors.Wrap(err, "more") + return err } d.unread() if err := f(d, k.buf); err != nil { return errors.Wrap(err, "callback") } if c, err = d.more(); err != nil { - return errors.Wrap(err, "next") + return err } } if c != '}' { - return errors.Wrap(badToken(c), "err") + err := badToken(c, d.offset()-1) + return errors.Wrap(err, `"}" expected`) } return d.decDepth() } diff --git a/dec_obj_iter.go b/dec_obj_iter.go index 77c4362..1e043bf 100644 --- a/dec_obj_iter.go +++ b/dec_obj_iter.go @@ -15,10 +15,10 @@ type ObjIter struct { // ObjIter creates new object iterator. func (d *Decoder) ObjIter() (ObjIter, error) { if err := d.consume('{'); err != nil { - return ObjIter{}, errors.Wrap(err, "start") + return ObjIter{}, errors.Wrap(err, `"{" expected`) } if err := d.incDepth(); err != nil { - return ObjIter{}, errors.Wrap(err, "inc") + return ObjIter{}, err } if _, err := d.more(); err != nil { return ObjIter{}, err @@ -53,7 +53,8 @@ func (i *ObjIter) Next() bool { } if i.comma { if c != ',' { - i.err = badToken(c) + err := badToken(c, dec.offset()-1) + i.err = errors.Wrap(err, `"," expected`) return false } } else { @@ -62,16 +63,17 @@ func (i *ObjIter) Next() bool { k, err := dec.str(value{raw: i.isBuffer}) if err != nil { - i.err = errors.Wrap(err, "str") + i.err = errors.Wrap(err, "field name") return false } if err := dec.consume(':'); err != nil { - i.err = errors.Wrap(err, "field") + i.err = errors.Wrap(err, `":" expected`) return false } // Skip whitespace. if _, err = dec.more(); err != nil { - i.err = errors.Wrap(err, "more") + err := badToken(c, dec.offset()-1) + i.err = errors.Wrap(err, `"," or "}" expected`) return false } dec.unread() diff --git a/dec_read.go b/dec_read.go index 0b9abe2..0c61d53 100644 --- a/dec_read.go +++ b/dec_read.go @@ -24,10 +24,10 @@ func (d *Decoder) consume(c byte) (err error) { for i, got := range buf { switch spaceSet[got] { default: - d.head += i + 1 if c != got { - return badToken(got) + return badToken(got, d.offset()+i) } + d.head += i + 1 return nil case 1: continue @@ -103,6 +103,7 @@ func (d *Decoder) read() error { return err } + d.streamOffset += d.tail d.head = 0 d.tail = n return nil @@ -125,6 +126,7 @@ func (d *Decoder) readAtLeast(min int) error { return err } + d.streamOffset += d.tail d.head = 0 d.tail = n return nil @@ -146,8 +148,8 @@ func (d *Decoder) readExact4(b *[4]byte) error { return nil } -func findInvalidToken4(buf [4]byte, mask uint32) error { +func findInvalidToken4(buf [4]byte, mask uint32, offset int) error { c := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 idx := bits.TrailingZeros32(c^mask) / 8 - return badToken(buf[idx]) + return badToken(buf[idx], offset+idx) } diff --git a/dec_skip.go b/dec_skip.go index ec384db..41ad52c 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -39,7 +39,7 @@ func (d *Decoder) Skip() error { } return nil default: - return badToken(c) + return badToken(c, d.offset()-1) } } @@ -82,7 +82,7 @@ func (d *Decoder) skipNumber() error { } // Character after '-' must be a digit. if skipNumberSet[c] != digitTag { - return badToken(c) + return badToken(c, d.offset()-1) } if c != '0' { break @@ -111,7 +111,7 @@ func (d *Decoder) skipNumber() error { case 'e', 'E': goto stateExp default: - return badToken(c) + return badToken(c, d.offset()) } } for { @@ -132,7 +132,7 @@ func (d *Decoder) skipNumber() error { d.head += i goto stateExp default: - return badToken(c) + return badToken(c, d.offset()+i) } } @@ -168,12 +168,12 @@ stateDot: switch c { case 'e', 'E': if last == '.' { - return badToken(c) + return badToken(c, d.offset()+i) } d.head += i goto stateExp default: - return badToken(c) + return badToken(c, d.offset()+i) } } @@ -207,10 +207,10 @@ stateExp: } // There must be a number after sign. if skipNumberSet[num] != digitTag { - return badToken(num) + return badToken(num, d.offset()-1) } } else { - return badToken(numOrSign) + return badToken(numOrSign, d.offset()-1) } } } @@ -221,7 +221,7 @@ stateExp: return nil } if skipNumberSet[c] == 0 { - return badToken(c) + return badToken(c, d.offset()+i) } } @@ -360,14 +360,14 @@ readTok: return err } if hexSet[h] == 0 { - return badToken(h) + return badToken(h, d.offset()-1) } } case 0: - return badToken(v) + return badToken(v, d.offset()-1) } case c < ' ': - return badToken(c) + return badToken(c, d.offset()+i) } goto readStr } @@ -382,7 +382,7 @@ func (d *Decoder) skipObj() error { c, err := d.more() if err != nil { - return errors.Wrap(err, "next") + return errors.Wrap(err, `'"' or "}" expected`) } switch c { case '}': @@ -390,25 +390,25 @@ func (d *Decoder) skipObj() error { case '"': d.unread() default: - return badToken(c) + return badToken(c, d.offset()-1) } for { if err := d.consume('"'); err != nil { - return err + return errors.Wrap(err, `'"' expected`) } if err := d.skipStr(); err != nil { return errors.Wrap(err, "read field name") } if err := d.consume(':'); err != nil { - return errors.Wrap(err, "field") + return errors.Wrap(err, `":" expected`) } if err := d.Skip(); err != nil { return err } c, err := d.more() if err != nil { - return errors.Wrap(err, "read comma") + return errors.Wrap(err, `"," or "}" expected`) } switch c { case ',': @@ -416,7 +416,7 @@ func (d *Decoder) skipObj() error { case '}': return d.decDepth() default: - return badToken(c) + return badToken(c, d.offset()-1) } } } @@ -431,7 +431,7 @@ func (d *Decoder) skipArr() error { c, err := d.more() if err != nil { - return errors.Wrap(err, "next") + return errors.Wrap(err, `value or "]" expected`) } if c == ']' { return d.decDepth() @@ -444,7 +444,7 @@ func (d *Decoder) skipArr() error { } c, err := d.more() if err != nil { - return errors.Wrap(err, "read comma") + return errors.Wrap(err, `"," or "]" expected`) } switch c { case ',': @@ -452,7 +452,7 @@ func (d *Decoder) skipArr() error { case ']': return d.decDepth() default: - return badToken(c) + return badToken(c, d.offset()-1) } } } diff --git a/dec_str.go b/dec_str.go index 6cff68d..4c36295 100644 --- a/dec_str.go +++ b/dec_str.go @@ -1,7 +1,6 @@ package jx import ( - "fmt" "io" "unicode/utf16" "unicode/utf8" @@ -34,22 +33,9 @@ func (v value) rune(r rune) value { } } -// badTokenErr means that Token was unexpected while decoding. -type badTokenErr struct { - Token byte -} - -func (e badTokenErr) Error() string { - return fmt.Sprintf("unexpected byte %d '%s'", e.Token, []byte{e.Token}) -} - -func badToken(c byte) error { - return badTokenErr{Token: c} -} - func (d *Decoder) str(v value) (value, error) { if err := d.consume('"'); err != nil { - return value{}, errors.Wrap(err, "start") + return value{}, err } var ( c byte @@ -135,7 +121,7 @@ readTok: // We need a copy anyway, because string is escaped. return d.strSlow(value{buf: append(v.buf, str...)}) default: - return v, badToken(c) + return v, badToken(c, d.offset()+i) } } @@ -226,14 +212,14 @@ readTok: v.buf = append(v.buf, str...) c, err := d.byte() if err != nil { - return value{}, errors.Wrap(err, "next") + return value{}, err } v, err = d.escapedChar(v, c) if err != nil { return v, errors.Wrap(err, "escape") } default: - return v, badToken(c) + return v, badToken(c, d.offset()-1) } goto readStr } @@ -297,20 +283,24 @@ func (d *Decoder) escapedChar(v value, c byte) (value, error) { v = v.rune(r1) } case 0: - return v, errors.Wrap(badToken(c), "bad escape: %w") + err := badToken(c, d.offset()-1) + return v, errors.Wrap(err, "bad escape") } return v, nil } func (d *Decoder) readU4() (v rune, _ error) { - var b [4]byte + var ( + offset = d.offset() + b [4]byte + ) if err := d.readExact4(&b); err != nil { return 0, err } - for _, c := range b { + for i, c := range b { val := hexSet[c] if val == 0 { - return 0, badToken(c) + return 0, badToken(c, offset+i) } v = v*16 + rune(val-1) } diff --git a/num.go b/num.go index d62664c..e9430ed 100644 --- a/num.go +++ b/num.go @@ -40,7 +40,7 @@ func (n Num) Str() bool { func (n Num) floatAsInt() error { // Allow decoding floats with zero fractional, like 1.0 as 1. var dot bool - for _, c := range n { + for i, c := range n { if c == '.' { dot = true continue @@ -51,7 +51,7 @@ func (n Num) floatAsInt() error { switch c { case '0', '"': // ok default: - return errors.Wrap(badToken(c), "non-zero fractional part") + return errors.Errorf("non-zero fractional part %q at %d", c, i) } } return nil From 9b3e64df1dd80a00c79077046d9f5f5597dba340 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Tue, 17 Jan 2023 01:09:32 +0300 Subject: [PATCH 02/10] test: check that offset is correct --- dec_error_test.go | 16 ++++++++++++++++ dec_skip_cases_test.go | 18 ++++++++++++++++-- dec_str_test.go | 8 -------- null_test.go | 2 +- 4 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 dec_error_test.go diff --git a/dec_error_test.go b/dec_error_test.go new file mode 100644 index 0000000..37e87fe --- /dev/null +++ b/dec_error_test.go @@ -0,0 +1,16 @@ +package jx + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_badTokenErr_Error(t *testing.T) { + e := &badTokenErr{ + Token: 'c', + Offset: 10, + } + s := error(e).Error() + require.Equal(t, "unexpected byte 99 'c' at 10", s) +} diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 53c2d95..2d9cdd4 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -8,6 +8,7 @@ import ( "strings" "testing" + "github.com/go-faster/errors" "github.com/stretchr/testify/require" ) @@ -477,6 +478,7 @@ func TestDecoder_Skip(t *testing.T) { }) for _, testCase := range testCases { + testCase := testCase valType := reflect.TypeOf(testCase.ptr).Elem() t.Run(valType.Kind().String(), func(t *testing.T) { for inputIdx, input := range testCase.inputs { @@ -494,7 +496,7 @@ func TestDecoder_Skip(t *testing.T) { should.NoError(iter.Skip()) should.ErrorIs(iter.Null(), io.ErrUnexpectedEOF) } else { - should.Error(func() error { + err := func() error { if err := iter.Skip(); err != nil { return err } @@ -502,7 +504,19 @@ func TestDecoder_Skip(t *testing.T) { return err } return nil - }()) + }() + should.Error(err) + if be, ok := errors.Into[*badTokenErr](err); ok { + offset := be.Offset + should.True(offset >= 0) + should.True(offset < len(input)) + should.Equal(be.Token, input[offset]) + + if se, ok := errors.Into[*json.SyntaxError](stdErr); ok { + expected, got := input[se.Offset-1], input[offset] + should.Equal(expected, got) + } + } } } t.Run(fmt.Sprintf("Test%d", inputIdx), testBufferReader(input, cb)) diff --git a/dec_str_test.go b/dec_str_test.go index b8eacd9..d8e62a6 100644 --- a/dec_str_test.go +++ b/dec_str_test.go @@ -26,14 +26,6 @@ func TestDecoder_StrAppend(t *testing.T) { require.ErrorIs(t, err, io.ErrUnexpectedEOF) } -func TestUnexpectedTokenErr_Error(t *testing.T) { - e := &badTokenErr{ - Token: 'c', - } - s := error(e).Error() - require.Equal(t, "unexpected byte 99 'c'", s) -} - func TestDecoder_Str(t *testing.T) { runTestCases(t, testStrings, func(t *testing.T, d *Decoder) error { _, err := d.Str() diff --git a/null_test.go b/null_test.go index 6a633a1..fff6394 100644 --- a/null_test.go +++ b/null_test.go @@ -64,7 +64,7 @@ func TestNullError(t *testing.T) { continue } b[i] = c - var token badTokenErr + var token *badTokenErr a.ErrorAs(DecodeBytes(b[:]).Null(), &token) a.Equalf(c, token.Token, "%c != %c (%q)", c, token.Token, b) } From fd89fc87b9e43d08812e087581377cb073cf2bb8 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Tue, 17 Jan 2023 03:44:12 +0300 Subject: [PATCH 03/10] chore: make linter happy --- dec_num.go | 2 +- dec_skip.go | 2 +- dec_skip_cases_test.go | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dec_num.go b/dec_num.go index 2b1b7bd..4e6fbd0 100644 --- a/dec_num.go +++ b/dec_num.go @@ -45,7 +45,7 @@ func (d *Decoder) num(v Num, forceAppend bool) (Num, error) { return Num{}, errors.Wrap(err, "skip number") } default: - return nil, badToken(c, offset) + return nil, badToken(c, offset) } } diff --git a/dec_skip.go b/dec_skip.go index 41ad52c..4918ef5 100644 --- a/dec_skip.go +++ b/dec_skip.go @@ -354,7 +354,7 @@ readTok: } switch escapedStrSet[v] { case 'u': - for i := 0; i < 4; i++ { + for range [4]struct{}{} { h, err := d.byte() if err != nil { return err diff --git a/dec_skip_cases_test.go b/dec_skip_cases_test.go index 2d9cdd4..dae21d5 100644 --- a/dec_skip_cases_test.go +++ b/dec_skip_cases_test.go @@ -8,8 +8,9 @@ import ( "strings" "testing" - "github.com/go-faster/errors" "github.com/stretchr/testify/require" + + "github.com/go-faster/errors" ) var testBools = []string{ From e0394e08ec1af7f836c700d54632c343efc91bad Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 18 Jan 2023 03:38:38 +0300 Subject: [PATCH 04/10] fix: skip space before integer decoding --- dec_float_test.go | 8 ++--- dec_int.go | 78 ++++++++++++++++++++++++++--------------------- dec_int_test.go | 67 ++++++++++++++++++++++++++++++++++++++++ int_test.go | 10 +++--- 4 files changed, 119 insertions(+), 44 deletions(-) diff --git a/dec_float_test.go b/dec_float_test.go index 773ade4..42d8d4d 100644 --- a/dec_float_test.go +++ b/dec_float_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/require" ) -func decodeStr(t *testing.T, s string, f func(d *Decoder)) { +func decodeStr(t *testing.T, s string, f func(t *testing.T, d *Decoder)) { t.Helper() for _, d := range []struct { Name string @@ -36,7 +36,7 @@ func decodeStr(t *testing.T, s string, f func(d *Decoder)) { t.Run(d.Name, func(t *testing.T) { t.Helper() dec := d.Fn() - f(dec) + f(t, dec) }) } } @@ -117,14 +117,14 @@ func TestDecoder_Float64(t *testing.T) { require.NoError(t, err) }) t.Run("32", func(t *testing.T) { - decodeStr(t, tc.String, func(d *Decoder) { + decodeStr(t, tc.String, func(t *testing.T, d *Decoder) { v, err := d.Float32() require.NoError(t, err) require.InEpsilonf(t, tc.Value, v, epsilon, "%v != %v", tc.Value, v) }) }) t.Run("64", func(t *testing.T) { - decodeStr(t, tc.String, func(d *Decoder) { + decodeStr(t, tc.String, func(t *testing.T, d *Decoder) { v, err := d.Float64() require.NoError(t, err) require.InEpsilonf(t, tc.Value, v, epsilon, "%v != %v", tc.Value, v) diff --git a/dec_int.go b/dec_int.go index dc574cf..ec8274f 100644 --- a/dec_int.go +++ b/dec_int.go @@ -75,12 +75,16 @@ func (d *Decoder) UInt() (uint, error) { // Int8 reads int8. func (d *Decoder) Int8() (int8, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } if c == '-' { - val, err := d.readUInt8() + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt8(c) if err != nil { return 0, err } @@ -89,8 +93,7 @@ func (d *Decoder) Int8() (int8, error) { } return -int8(val), nil } - d.unread() - val, err := d.readUInt8() + val, err := d.readUInt8(c) if err != nil { return 0, err } @@ -102,20 +105,20 @@ func (d *Decoder) Int8() (int8, error) { // UInt8 reads uint8. func (d *Decoder) UInt8() (uint8, error) { - return d.readUInt8() -} - -func (d *Decoder) readUInt8() (uint8, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } + return d.readUInt8(c) +} + +func (d *Decoder) readUInt8(c byte) (uint8, error) { ind := intDigits[c] if ind == 0 { return 0, nil } if ind == invalidCharForNumber { - return 0, errors.Wrap(err, "bad token") + return 0, badToken(c, d.offset()-1) } value := uint8(ind) if d.tail-d.head > 10 { @@ -169,12 +172,16 @@ func (d *Decoder) readUInt8() (uint8, error) { // Int16 reads int16. func (d *Decoder) Int16() (int16, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } if c == '-' { - val, err := d.readUInt16() + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt16(c) if err != nil { return 0, err } @@ -183,8 +190,7 @@ func (d *Decoder) Int16() (int16, error) { } return -int16(val), nil } - d.unread() - val, err := d.readUInt16() + val, err := d.readUInt16(c) if err != nil { return 0, err } @@ -196,20 +202,20 @@ func (d *Decoder) Int16() (int16, error) { // UInt16 reads uint16. func (d *Decoder) UInt16() (uint16, error) { - return d.readUInt16() -} - -func (d *Decoder) readUInt16() (uint16, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } + return d.readUInt16(c) +} + +func (d *Decoder) readUInt16(c byte) (uint16, error) { ind := intDigits[c] if ind == 0 { return 0, nil } if ind == invalidCharForNumber { - return 0, errors.Wrap(err, "bad token") + return 0, badToken(c, d.offset()-1) } value := uint16(ind) if d.tail-d.head > 10 { @@ -275,12 +281,16 @@ func (d *Decoder) readUInt16() (uint16, error) { // Int32 reads int32. func (d *Decoder) Int32() (int32, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } if c == '-' { - val, err := d.readUInt32() + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt32(c) if err != nil { return 0, err } @@ -289,8 +299,7 @@ func (d *Decoder) Int32() (int32, error) { } return -int32(val), nil } - d.unread() - val, err := d.readUInt32() + val, err := d.readUInt32(c) if err != nil { return 0, err } @@ -302,20 +311,20 @@ func (d *Decoder) Int32() (int32, error) { // UInt32 reads uint32. func (d *Decoder) UInt32() (uint32, error) { - return d.readUInt32() -} - -func (d *Decoder) readUInt32() (uint32, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } + return d.readUInt32(c) +} + +func (d *Decoder) readUInt32(c byte) (uint32, error) { ind := intDigits[c] if ind == 0 { return 0, nil } if ind == invalidCharForNumber { - return 0, errors.Wrap(err, "bad token") + return 0, badToken(c, d.offset()-1) } value := uint32(ind) if d.tail-d.head > 10 { @@ -399,12 +408,12 @@ func (d *Decoder) readUInt32() (uint32, error) { // Int64 reads int64. func (d *Decoder) Int64() (int64, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } if c == '-' { - c, err := d.next() + c, err := d.byte() if err != nil { return 0, err } @@ -429,7 +438,7 @@ func (d *Decoder) Int64() (int64, error) { // UInt64 reads uint64. func (d *Decoder) UInt64() (uint64, error) { - c, err := d.byte() + c, err := d.more() if err != nil { return 0, err } @@ -442,8 +451,7 @@ func (d *Decoder) readUInt64(c byte) (uint64, error) { return 0, nil // single zero } if ind == invalidCharForNumber { - err := badToken(c, d.offset()-1) - return 0, errors.Wrap(err, "invalid number") + return 0, badToken(c, d.offset()-1) } value := uint64(ind) if d.tail-d.head > 10 { diff --git a/dec_int_test.go b/dec_int_test.go index e4d947d..d5d5a98 100644 --- a/dec_int_test.go +++ b/dec_int_test.go @@ -1,6 +1,7 @@ package jx import ( + "fmt" "testing" "github.com/stretchr/testify/require" @@ -102,3 +103,69 @@ func TestDecoderIntError(t *testing.T) { require.ErrorIs(t, err, r.Err()) }) } + +func intDecoderOnlyError[T any](fn func(*Decoder) (T, error)) func(*Decoder) error { + return func(d *Decoder) error { + _, err := fn(d) + return err + } +} + +func TestDecoderIntUnexpectedSpace(t *testing.T) { + type intFunc struct { + name string + fn func(*Decoder) error + } + signed := []intFunc{ + {"Int", intDecoderOnlyError((*Decoder).Int)}, + {"Int8", intDecoderOnlyError((*Decoder).Int8)}, + {"Int16", intDecoderOnlyError((*Decoder).Int16)}, + {"Int32", intDecoderOnlyError((*Decoder).Int32)}, + {"Int64", intDecoderOnlyError((*Decoder).Int64)}, + } + unsigned := []intFunc{ + {"UInt", intDecoderOnlyError((*Decoder).UInt)}, + {"UInt8", intDecoderOnlyError((*Decoder).UInt8)}, + {"UInt16", intDecoderOnlyError((*Decoder).UInt16)}, + {"UInt32", intDecoderOnlyError((*Decoder).UInt32)}, + {"UInt64", intDecoderOnlyError((*Decoder).UInt64)}, + } + + tests := []struct { + input string + unsigned bool + wantErr bool + }{ + {" 10", true, false}, + {" 10", true, false}, + {" -10", false, false}, + + {"- 10", false, true}, + } + + for i, tt := range tests { + tt := tt + t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) { + check := func(fns []intFunc) { + for _, intFn := range fns { + intFn := intFn + t.Run(intFn.name, func(t *testing.T) { + decodeStr(t, tt.input, func(t *testing.T, d *Decoder) { + err := intFn.fn(d) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + }) + }) + } + } + + check(signed) + if tt.unsigned { + check(unsigned) + } + }) + } +} diff --git a/int_test.go b/int_test.go index 4d765cf..2405e2b 100644 --- a/int_test.go +++ b/int_test.go @@ -36,35 +36,35 @@ func TestDecoderIntNumbers(t *testing.T) { } s := string(data) t.Run("32", func(t *testing.T) { - decodeStr(t, s, func(d *Decoder) { + decodeStr(t, s, func(t *testing.T, d *Decoder) { got, err := d.Int32() require.NoError(t, err) require.Equal(t, int32(v), got) }) }) t.Run("64", func(t *testing.T) { - decodeStr(t, s, func(d *Decoder) { + decodeStr(t, s, func(t *testing.T, d *Decoder) { got, err := d.Int64() require.NoError(t, err) require.Equal(t, int64(v), got) }) }) t.Run("int", func(t *testing.T) { - decodeStr(t, s, func(d *Decoder) { + decodeStr(t, s, func(t *testing.T, d *Decoder) { got, err := d.Int() require.NoError(t, err) require.Equal(t, v, got) }) }) t.Run("uint", func(t *testing.T) { - decodeStr(t, s, func(d *Decoder) { + decodeStr(t, s, func(t *testing.T, d *Decoder) { got, err := d.UInt() require.NoError(t, err) require.Equal(t, uint(v), got) }) }) t.Run("uint32", func(t *testing.T) { - decodeStr(t, s, func(d *Decoder) { + decodeStr(t, s, func(t *testing.T, d *Decoder) { got, err := d.UInt32() require.NoError(t, err) require.Equal(t, uint32(v), got) From 514cd6a88a6b88ab1b94865839ccfbf75a9100c7 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Wed, 18 Jan 2023 04:41:22 +0300 Subject: [PATCH 05/10] fix: check integer for digit after leading zero --- dec_int.go | 48 ++++++++++++++++++++++++++++++++++++------------ dec_int_test.go | 26 ++++++++++++++++---------- dec_read.go | 11 +++++++++++ 3 files changed, 63 insertions(+), 22 deletions(-) diff --git a/dec_int.go b/dec_int.go index ec8274f..38dafa2 100644 --- a/dec_int.go +++ b/dec_int.go @@ -114,10 +114,16 @@ func (d *Decoder) UInt8() (uint8, error) { func (d *Decoder) readUInt8(c byte) (uint8, error) { ind := intDigits[c] - if ind == 0 { + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } return 0, nil - } - if ind == invalidCharForNumber { + case invalidCharForNumber: return 0, badToken(c, d.offset()-1) } value := uint8(ind) @@ -211,10 +217,16 @@ func (d *Decoder) UInt16() (uint16, error) { func (d *Decoder) readUInt16(c byte) (uint16, error) { ind := intDigits[c] - if ind == 0 { + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } return 0, nil - } - if ind == invalidCharForNumber { + case invalidCharForNumber: return 0, badToken(c, d.offset()-1) } value := uint16(ind) @@ -320,10 +332,16 @@ func (d *Decoder) UInt32() (uint32, error) { func (d *Decoder) readUInt32(c byte) (uint32, error) { ind := intDigits[c] - if ind == 0 { + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } return 0, nil - } - if ind == invalidCharForNumber { + case invalidCharForNumber: return 0, badToken(c, d.offset()-1) } value := uint32(ind) @@ -447,10 +465,16 @@ func (d *Decoder) UInt64() (uint64, error) { func (d *Decoder) readUInt64(c byte) (uint64, error) { ind := intDigits[c] - if ind == 0 { + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } return 0, nil // single zero - } - if ind == invalidCharForNumber { + case invalidCharForNumber: return 0, badToken(c, d.offset()-1) } value := uint64(ind) diff --git a/dec_int_test.go b/dec_int_test.go index d5d5a98..3da6dc4 100644 --- a/dec_int_test.go +++ b/dec_int_test.go @@ -111,7 +111,7 @@ func intDecoderOnlyError[T any](fn func(*Decoder) (T, error)) func(*Decoder) err } } -func TestDecoderIntUnexpectedSpace(t *testing.T) { +func TestDecoderIntUnexpectedChar(t *testing.T) { type intFunc struct { name string fn func(*Decoder) error @@ -132,15 +132,21 @@ func TestDecoderIntUnexpectedSpace(t *testing.T) { } tests := []struct { - input string - unsigned bool - wantErr bool + input string + unsigned bool + errString string }{ - {" 10", true, false}, - {" 10", true, false}, - {" -10", false, false}, + // Leading space. + {" 10", true, ""}, + {" 10", true, ""}, + {" -10", false, ""}, - {"- 10", false, true}, + // Space in the middle. + {"- 10", false, "unexpected byte 32 ' ' at 1"}, + + // Digit after leading zero. + {"00", true, "digit after leading zero: unexpected byte 48 '0' at 1"}, + {"01", true, "digit after leading zero: unexpected byte 49 '1' at 1"}, } for i, tt := range tests { @@ -152,8 +158,8 @@ func TestDecoderIntUnexpectedSpace(t *testing.T) { t.Run(intFn.name, func(t *testing.T) { decodeStr(t, tt.input, func(t *testing.T, d *Decoder) { err := intFn.fn(d) - if tt.wantErr { - require.Error(t, err) + if e := tt.errString; e != "" { + require.EqualError(t, err, e) return } require.NoError(t, err) diff --git a/dec_read.go b/dec_read.go index 0c61d53..2640caf 100644 --- a/dec_read.go +++ b/dec_read.go @@ -70,6 +70,17 @@ func (d *Decoder) next() (byte, error) { } } +// peek returns next byte without advancing. +func (d *Decoder) peek() (byte, error) { + if d.head == d.tail { + if err := d.read(); err != nil { + return 0, err + } + } + c := d.buf[d.head] + return c, nil +} + func (d *Decoder) byte() (byte, error) { if d.head == d.tail { err := d.read() From 1eced1bcffe4f618b9ac1d44c460f21dd5d43356 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 19 Jan 2023 04:32:52 +0300 Subject: [PATCH 06/10] feat: generate int decoders --- dec_int.go | 510 ------------------ generate.go | 3 + tools/mkencint/main.go | 126 ----- tools/mkint/decode.tmpl | 157 ++++++ .../{mkencint/gen.tmpl => mkint/encode.tmpl} | 59 +- tools/mkint/main.go | 163 ++++++ w_int.go | 2 - 7 files changed, 353 insertions(+), 667 deletions(-) create mode 100644 generate.go delete mode 100644 tools/mkencint/main.go create mode 100644 tools/mkint/decode.tmpl rename tools/{mkencint/gen.tmpl => mkint/encode.tmpl} (50%) create mode 100644 tools/mkint/main.go diff --git a/dec_int.go b/dec_int.go index 38dafa2..41ed10d 100644 --- a/dec_int.go +++ b/dec_int.go @@ -1,34 +1,9 @@ package jx import ( - "io" - "math" "strconv" - - "github.com/go-faster/errors" -) - -var ( - intDigits [256]int8 - errOverflow = strconv.ErrRange -) - -const ( - uint8SafeToMultiply10 = uint8(0xff)/10 - 1 - uint16SafeToMultiply10 = uint16(0xffff)/10 - 1 - uint32SafeToMultiply10 = uint32(0xffffffff)/10 - 1 - uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1 ) -func init() { - for i := 0; i < len(intDigits); i++ { - intDigits[i] = invalidCharForNumber - } - for i := int8('0'); i <= int8('9'); i++ { - intDigits[i] = i - int8('0') - } -} - func (d *Decoder) int(size int) (int, error) { switch size { case 8: @@ -72,488 +47,3 @@ func (d *Decoder) uint(size int) (uint, error) { func (d *Decoder) UInt() (uint, error) { return d.uint(strconv.IntSize) } - -// Int8 reads int8. -func (d *Decoder) Int8() (int8, error) { - c, err := d.more() - if err != nil { - return 0, err - } - if c == '-' { - c, err := d.byte() - if err != nil { - return 0, err - } - val, err := d.readUInt8(c) - if err != nil { - return 0, err - } - if val > math.MaxInt8+1 { - return 0, errOverflow - } - return -int8(val), nil - } - val, err := d.readUInt8(c) - if err != nil { - return 0, err - } - if val > math.MaxInt8 { - return 0, errOverflow - } - return int8(val), nil -} - -// UInt8 reads uint8. -func (d *Decoder) UInt8() (uint8, error) { - c, err := d.more() - if err != nil { - return 0, err - } - return d.readUInt8(c) -} - -func (d *Decoder) readUInt8(c byte) (uint8, error) { - ind := intDigits[c] - switch ind { - case 0: - // Check that next byte is not a digit. - c, err := d.peek() - if err == nil && intDigits[c] != invalidCharForNumber { - err := badToken(c, d.offset()) - return 0, errors.Wrap(err, "digit after leading zero") - } - return 0, nil - case invalidCharForNumber: - return 0, badToken(c, d.offset()-1) - } - value := uint8(ind) - if d.tail-d.head > 10 { - i := d.head - ind2 := intDigits[d.buf[i]] - if ind2 == invalidCharForNumber { - d.head = i - return value, nil - } - i++ - ind3 := intDigits[d.buf[i]] - if ind3 == invalidCharForNumber { - d.head = i - return value*10 + uint8(ind2), nil - } - i++ - ind4 := intDigits[d.buf[i]] - value = value*100 + uint8(ind2)*10 + uint8(ind3) - d.head = i - if ind4 == invalidCharForNumber { - return value, nil - } - } - for { - buf := d.buf[d.head:d.tail] - for i, c := range buf { - ind = intDigits[c] - if ind == invalidCharForNumber { - d.head += i - return value, nil - } - if value > uint8SafeToMultiply10 { - value2 := (value << 3) + (value << 1) + uint8(ind) - if value2 < value { - return 0, errOverflow - } - value = value2 - continue - } - value = (value << 3) + (value << 1) + uint8(ind) - } - err := d.read() - if err == io.EOF { - return value, nil - } - if err != nil { - return 0, err - } - } -} - -// Int16 reads int16. -func (d *Decoder) Int16() (int16, error) { - c, err := d.more() - if err != nil { - return 0, err - } - if c == '-' { - c, err := d.byte() - if err != nil { - return 0, err - } - val, err := d.readUInt16(c) - if err != nil { - return 0, err - } - if val > math.MaxInt16+1 { - return 0, errOverflow - } - return -int16(val), nil - } - val, err := d.readUInt16(c) - if err != nil { - return 0, err - } - if val > math.MaxInt16 { - return 0, errOverflow - } - return int16(val), nil -} - -// UInt16 reads uint16. -func (d *Decoder) UInt16() (uint16, error) { - c, err := d.more() - if err != nil { - return 0, err - } - return d.readUInt16(c) -} - -func (d *Decoder) readUInt16(c byte) (uint16, error) { - ind := intDigits[c] - switch ind { - case 0: - // Check that next byte is not a digit. - c, err := d.peek() - if err == nil && intDigits[c] != invalidCharForNumber { - err := badToken(c, d.offset()) - return 0, errors.Wrap(err, "digit after leading zero") - } - return 0, nil - case invalidCharForNumber: - return 0, badToken(c, d.offset()-1) - } - value := uint16(ind) - if d.tail-d.head > 10 { - i := d.head - ind2 := intDigits[d.buf[i]] - if ind2 == invalidCharForNumber { - d.head = i - return value, nil - } - i++ - ind3 := intDigits[d.buf[i]] - if ind3 == invalidCharForNumber { - d.head = i - return value*10 + uint16(ind2), nil - } - i++ - ind4 := intDigits[d.buf[i]] - if ind4 == invalidCharForNumber { - d.head = i - return value*100 + uint16(ind2)*10 + uint16(ind3), nil - } - i++ - ind5 := intDigits[d.buf[i]] - if ind5 == invalidCharForNumber { - d.head = i - return value*1000 + uint16(ind2)*100 + uint16(ind3)*10 + uint16(ind4), nil - } - i++ - ind6 := intDigits[d.buf[i]] - value = value*10000 + uint16(ind2)*1000 + uint16(ind3)*100 + uint16(ind4)*10 + uint16(ind5) - d.head = i - if ind6 == invalidCharForNumber { - return value, nil - } - } - for { - buf := d.buf[d.head:d.tail] - for i, c := range buf { - ind = intDigits[c] - if ind == invalidCharForNumber { - d.head += i - return value, nil - } - if value > uint16SafeToMultiply10 { - value2 := (value << 3) + (value << 1) + uint16(ind) - if value2 < value { - return 0, errOverflow - } - value = value2 - continue - } - value = (value << 3) + (value << 1) + uint16(ind) - } - err := d.read() - if err == io.EOF { - return value, nil - } - if err != nil { - return 0, err - } - } -} - -// Int32 reads int32. -func (d *Decoder) Int32() (int32, error) { - c, err := d.more() - if err != nil { - return 0, err - } - if c == '-' { - c, err := d.byte() - if err != nil { - return 0, err - } - val, err := d.readUInt32(c) - if err != nil { - return 0, err - } - if val > math.MaxInt32+1 { - return 0, errOverflow - } - return -int32(val), nil - } - val, err := d.readUInt32(c) - if err != nil { - return 0, err - } - if val > math.MaxInt32 { - return 0, errOverflow - } - return int32(val), nil -} - -// UInt32 reads uint32. -func (d *Decoder) UInt32() (uint32, error) { - c, err := d.more() - if err != nil { - return 0, err - } - return d.readUInt32(c) -} - -func (d *Decoder) readUInt32(c byte) (uint32, error) { - ind := intDigits[c] - switch ind { - case 0: - // Check that next byte is not a digit. - c, err := d.peek() - if err == nil && intDigits[c] != invalidCharForNumber { - err := badToken(c, d.offset()) - return 0, errors.Wrap(err, "digit after leading zero") - } - return 0, nil - case invalidCharForNumber: - return 0, badToken(c, d.offset()-1) - } - value := uint32(ind) - if d.tail-d.head > 10 { - i := d.head - ind2 := intDigits[d.buf[i]] - if ind2 == invalidCharForNumber { - d.head = i - return value, nil - } - i++ - ind3 := intDigits[d.buf[i]] - if ind3 == invalidCharForNumber { - d.head = i - return value*10 + uint32(ind2), nil - } - i++ - ind4 := intDigits[d.buf[i]] - if ind4 == invalidCharForNumber { - d.head = i - return value*100 + uint32(ind2)*10 + uint32(ind3), nil - } - i++ - ind5 := intDigits[d.buf[i]] - if ind5 == invalidCharForNumber { - d.head = i - return value*1000 + uint32(ind2)*100 + uint32(ind3)*10 + uint32(ind4), nil - } - i++ - ind6 := intDigits[d.buf[i]] - if ind6 == invalidCharForNumber { - d.head = i - return value*10000 + uint32(ind2)*1000 + uint32(ind3)*100 + uint32(ind4)*10 + uint32(ind5), nil - } - i++ - ind7 := intDigits[d.buf[i]] - if ind7 == invalidCharForNumber { - d.head = i - return value*100000 + uint32(ind2)*10000 + uint32(ind3)*1000 + uint32(ind4)*100 + uint32(ind5)*10 + uint32(ind6), nil - } - i++ - ind8 := intDigits[d.buf[i]] - if ind8 == invalidCharForNumber { - d.head = i - return value*1000000 + uint32(ind2)*100000 + uint32(ind3)*10000 + uint32(ind4)*1000 + uint32(ind5)*100 + uint32(ind6)*10 + uint32(ind7), nil - } - i++ - ind9 := intDigits[d.buf[i]] - value = value*10000000 + uint32(ind2)*1000000 + uint32(ind3)*100000 + uint32(ind4)*10000 + uint32(ind5)*1000 + uint32(ind6)*100 + uint32(ind7)*10 + uint32(ind8) - d.head = i - if ind9 == invalidCharForNumber { - return value, nil - } - } - for { - buf := d.buf[d.head:d.tail] - for i, c := range buf { - ind = intDigits[c] - if ind == invalidCharForNumber { - d.head += i - return value, nil - } - if value > uint32SafeToMultiply10 { - value2 := (value << 3) + (value << 1) + uint32(ind) - if value2 < value { - return 0, errOverflow - } - value = value2 - continue - } - value = (value << 3) + (value << 1) + uint32(ind) - } - err := d.read() - if err == io.EOF { - return value, nil - } - if err != nil { - return 0, err - } - } -} - -// Int64 reads int64. -func (d *Decoder) Int64() (int64, error) { - c, err := d.more() - if err != nil { - return 0, err - } - if c == '-' { - c, err := d.byte() - if err != nil { - return 0, err - } - val, err := d.readUInt64(c) - if err != nil { - return 0, err - } - if val > math.MaxInt64+1 { - return 0, errors.Errorf("%d overflows", val) - } - return -int64(val), nil - } - val, err := d.readUInt64(c) - if err != nil { - return 0, err - } - if val > math.MaxInt64 { - return 0, errors.Errorf("%d overflows", val) - } - return int64(val), nil -} - -// UInt64 reads uint64. -func (d *Decoder) UInt64() (uint64, error) { - c, err := d.more() - if err != nil { - return 0, err - } - return d.readUInt64(c) -} - -func (d *Decoder) readUInt64(c byte) (uint64, error) { - ind := intDigits[c] - switch ind { - case 0: - // Check that next byte is not a digit. - c, err := d.peek() - if err == nil && intDigits[c] != invalidCharForNumber { - err := badToken(c, d.offset()) - return 0, errors.Wrap(err, "digit after leading zero") - } - return 0, nil // single zero - case invalidCharForNumber: - return 0, badToken(c, d.offset()-1) - } - value := uint64(ind) - if d.tail-d.head > 10 { - i := d.head - ind2 := intDigits[d.buf[i]] - if ind2 == invalidCharForNumber { - d.head = i - return value, nil - } - i++ - ind3 := intDigits[d.buf[i]] - if ind3 == invalidCharForNumber { - d.head = i - return value*10 + uint64(ind2), nil - } - i++ - ind4 := intDigits[d.buf[i]] - if ind4 == invalidCharForNumber { - d.head = i - return value*100 + uint64(ind2)*10 + uint64(ind3), nil - } - i++ - ind5 := intDigits[d.buf[i]] - if ind5 == invalidCharForNumber { - d.head = i - return value*1000 + uint64(ind2)*100 + uint64(ind3)*10 + uint64(ind4), nil - } - i++ - ind6 := intDigits[d.buf[i]] - if ind6 == invalidCharForNumber { - d.head = i - return value*10000 + uint64(ind2)*1000 + uint64(ind3)*100 + uint64(ind4)*10 + uint64(ind5), nil - } - i++ - ind7 := intDigits[d.buf[i]] - if ind7 == invalidCharForNumber { - d.head = i - return value*100000 + uint64(ind2)*10000 + uint64(ind3)*1000 + uint64(ind4)*100 + uint64(ind5)*10 + uint64(ind6), nil - } - i++ - ind8 := intDigits[d.buf[i]] - if ind8 == invalidCharForNumber { - d.head = i - return value*1000000 + uint64(ind2)*100000 + uint64(ind3)*10000 + uint64(ind4)*1000 + uint64(ind5)*100 + uint64(ind6)*10 + uint64(ind7), nil - } - i++ - ind9 := intDigits[d.buf[i]] - value = value*10000000 + uint64(ind2)*1000000 + uint64(ind3)*100000 + uint64(ind4)*10000 + uint64(ind5)*1000 + uint64(ind6)*100 + uint64(ind7)*10 + uint64(ind8) - d.head = i - if ind9 == invalidCharForNumber { - return value, nil - } - } - for { - buf := d.buf[d.head:d.tail] - for i, c := range buf { - ind = intDigits[c] - if ind == invalidCharForNumber { - d.head += i - return value, nil - } - if value > uint64SafeToMultiple10 { - value2 := (value << 3) + (value << 1) + uint64(ind) - if value2 < value { - return 0, errOverflow - } - value = value2 - continue - } - value = (value << 3) + (value << 1) + uint64(ind) - } - switch err := d.read(); err { - case io.EOF: - return value, nil - case nil: - continue - default: - return 0, err - } - } -} diff --git a/generate.go b/generate.go new file mode 100644 index 0000000..6720078 --- /dev/null +++ b/generate.go @@ -0,0 +1,3 @@ +package jx + +//go:generate go run ./tools/mkint diff --git a/tools/mkencint/main.go b/tools/mkencint/main.go deleted file mode 100644 index 2bcdfee..0000000 --- a/tools/mkencint/main.go +++ /dev/null @@ -1,126 +0,0 @@ -// Command mkencint generates integer encoding functions. -package main - -import ( - "bytes" - _ "embed" - "flag" - "fmt" - "go/format" - "io" - "math" - "os" - "strconv" - "text/template" - "unicode" - "unicode/utf8" -) - -// IntType represents Go integer type. -type IntType struct { - Name string - Iterations int -} - -// Config is generation config. -type Config struct { - PackageName string - Types []IntType -} - -func times(num int) []struct{} { - return make([]struct{}, num) -} - -func title(s string) string { - r, size := utf8.DecodeRuneInString(s) - return string(unicode.ToUpper(r)) + s[size:] -} - -func add(a, b int) int { - return a + b -} - -func sub(a, b int) int { - return a - b -} - -//go:embed gen.tmpl -var rawTemplate string - -func computeIterations(max uint64) int { - // Compute maximum pow of 1000 plus remainder. - return len(strconv.FormatUint(max, 10))/3 + 1 -} - -func generate(w io.Writer, pkgName string) error { - buf := bytes.Buffer{} - - types := []IntType{ - { - Name: "int64", - Iterations: computeIterations(math.MaxUint64), - }, - { - Name: "int32", - Iterations: computeIterations(math.MaxUint32), - }, - { - Name: "int16", - Iterations: computeIterations(math.MaxUint16), - }, - } - - t := template.Must(template.New("gen").Funcs(template.FuncMap{ - "times": times, - "title": title, - "add": add, - "sub": sub, - }).Parse(rawTemplate)) - if err := t.ExecuteTemplate(&buf, "main", Config{ - PackageName: pkgName, - Types: types, - }); err != nil { - return fmt.Errorf("execute: %w", err) - } - - formatted, err := format.Source(buf.Bytes()) - if err != nil { - _, _ = os.Stderr.Write(buf.Bytes()) - return fmt.Errorf("format: %w", err) - } - - if _, err := w.Write(formatted); err != nil { - return fmt.Errorf("write: %w", err) - } - - return nil -} - -func run() error { - var ( - o = flag.String("output", "", "output file") - pkgName = flag.String("package", "jx", "package name") - ) - flag.Parse() - - var w io.Writer = os.Stdout - if path := *o; path != "" { - f, err := os.Create(path) // #nosec G304 - if err != nil { - return err - } - defer func() { - fmt.Println(f.Close()) - }() - w = f - } - - return generate(w, *pkgName) -} - -func main() { - if err := run(); err != nil { - panic(err) - } -} diff --git a/tools/mkint/decode.tmpl b/tools/mkint/decode.tmpl new file mode 100644 index 0000000..36b3aec --- /dev/null +++ b/tools/mkint/decode.tmpl @@ -0,0 +1,157 @@ +{{ define "main" }} + {{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} + // Code generated by mkint, DO NOT EDIT. + + package {{ $.PackageName }} + + import ( + "io" + "math" + "strconv" + + "github.com/go-faster/errors" + ) + + + var ( + intDigits [256]int8 + errOverflow = strconv.ErrRange + ) + + const ( + uint8SafeToMultiple10 = uint8(0xff)/10 - 1 + uint16SafeToMultiple10 = uint16(0xffff)/10 - 1 + uint32SafeToMultiple10 = uint32(0xffffffff)/10 - 1 + uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1 + ) + + func init() { + for i := 0; i < len(intDigits); i++ { + intDigits[i] = invalidCharForNumber + } + for i := int8('0'); i <= int8('9'); i++ { + intDigits[i] = i - int8('0') + } + } + + {{ range $typ := $.Types }} + {{ template "decode_uint" $typ }} + {{ template "decode_int" $typ }} + {{- end }} + +{{ end }} + +{{ define "decode_uint" }} + {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} + // U{{ title $.Name }} reads u{{ $.Name }}. + func (d *Decoder) U{{ title $.Name }}() (u{{ $.Name }}, error) { + c, err := d.more() + if err != nil { + return 0, err + } + return d.readU{{ title $.Name }}(c) + } + + func (d *Decoder) readU{{ title $.Name }}(c byte) (u{{ $.Name }}, error) { + ind := intDigits[c] + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero + case invalidCharForNumber: + return 0, badToken(c, d.offset()-1) + } + value := u{{ $.Name }}(ind) + if d.tail-d.head > {{ $.DecoderIterations }} { + i := d.head + {{- range $i, $_ := times $.DecoderIterations }} + // Iteration {{ $i }}. + ind{{ add $i 2 }} := intDigits[d.buf[i]] + if ind{{ add $i 2 }} == invalidCharForNumber { + d.head = i + value *= {{ pow10 $i }} + {{- range $r, $_ := times $i }} + value += u{{ $.Name }}(ind{{ add $r 2 }}) * {{ pow10 (sub (sub $i $r) 1) }} + {{- end }} + return value, nil + } + + {{- if eq $i (sub $.DecoderIterations 1) }} + d.head = i + value *= {{ pow10 $i }} + {{- range $r, $_ := times $i }} + value += u{{ $.Name }}(ind{{ add $r 2 }}) * {{ pow10 (sub (sub $i $r) 1) }} + {{- end }} + {{- else }} + i++ + {{- end }} + + {{- end }} + } + for { + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > u{{ $.Name }}SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + u{{ $.Name }}(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + u{{ $.Name }}(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } + } + } +{{ end }} + +{{ define "decode_int" }} + {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} + // {{ title $.Name }} reads {{ $.Name }}. + func (d *Decoder) {{ title $.Name }}() ({{ $.Name }}, error) { + c, err := d.more() + if err != nil { + return 0, err + } + if c == '-' { + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readU{{ title $.Name }}(c) + if err != nil { + return 0, err + } + if val > math.Max{{ title $.Name }}+1 { + return 0, errors.Errorf("%d overflows", val) + } + return -{{ $.Name }}(val), nil + } + val, err := d.readU{{ title $.Name }}(c) + if err != nil { + return 0, err + } + if val > math.Max{{ title $.Name }} { + return 0, errors.Errorf("%d overflows", val) + } + return {{ $.Name }}(val), nil + } +{{ end }} diff --git a/tools/mkencint/gen.tmpl b/tools/mkint/encode.tmpl similarity index 50% rename from tools/mkencint/gen.tmpl rename to tools/mkint/encode.tmpl index 606bb53..9c44be1 100644 --- a/tools/mkencint/gen.tmpl +++ b/tools/mkint/encode.tmpl @@ -1,16 +1,16 @@ {{ define "main" }} -{{- /*gotype: github.com/go-faster/jx/tools/mkencint.Config*/ -}} -// Code generated by mkencint, DO NOT EDIT. + {{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} + // Code generated by mkint, DO NOT EDIT. -package {{ $.PackageName }} + package {{ $.PackageName }} -var digits []uint32 + var digits []uint32 -func init() { - digits = make([]uint32, 1000) - for i := uint32(0); i < 1000; i++ { - digits[i] = (((i / 100) + '0') << 16) + ((((i / 10) % 10) + '0') << 8) + i%10 + '0' - if i < 10 { + func init() { + digits = make([]uint32, 1000) + for i := uint32(0); i < 1000; i++ { + digits[i] = (((i / 100) + '0') << 16) + ((((i / 10) % 10) + '0') << 8) + i%10 + '0' + if i < 10 { digits[i] += 2 << 24 } else if i < 100 { digits[i] += 1 << 24 @@ -34,33 +34,34 @@ func writeBuf(buf []byte, v uint32) []byte { } {{ range $typ := $.Types }} -{{ template "encode_uint" $typ }} -{{ template "encode_int" $typ }} + {{ template "encode_uint" $typ }} + {{ template "encode_int" $typ }} {{- end }} + {{ end }} {{ define "encode_uint" }} -{{- /*gotype: github.com/go-faster/jx/tools/mkencint.IntType */ -}} + {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} // U{{ title $.Name }} encodes u{{ $.Name }}. func (w *Writer) U{{ title $.Name }}(v u{{ $.Name }}) { q0 := v - {{- range $i, $_ := times $.Iterations }} - // Iteration {{ $i }}. - {{- if not (eq $i 0) }} - r{{$i}} := q{{add $i -1}} - q{{$i}}*1000 - {{- end }} + {{- range $i, $_ := times $.EncoderIterations }} + // Iteration {{ $i }}. + {{- if not (eq $i 0) }} + r{{$i}} := q{{add $i -1}} - q{{$i}}*1000 + {{- end }} - {{- if eq $i (sub $.Iterations 1) }} - w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) - {{- range $r, $_ := times $i }} - w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) - {{- end }} - {{- else }} - q{{add $i 1}} := q{{$i}} / 1000 - if q{{add $i 1}} == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) - {{- range $r, $_ := times $i }} - w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) + {{- if eq $i (sub $.EncoderIterations 1) }} + w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) + {{- range $r, $_ := times $i }} + w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) + {{- end }} + {{- else }} + q{{add $i 1}} := q{{$i}} / 1000 + if q{{add $i 1}} == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) + {{- range $r, $_ := times $i }} + w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) {{- end }} return } @@ -76,7 +77,7 @@ func (e *Encoder) U{{ title $.Name }}(v u{{ $.Name }}) { {{ end }} {{ define "encode_int" }} -{{- /*gotype: github.com/go-faster/jx/tools/mkencint.IntType */ -}} + {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} // {{ title $.Name }} encodes {{ $.Name }}. func (w *Writer) {{ title $.Name }}(v {{ $.Name }}) { var val u{{ $.Name }} diff --git a/tools/mkint/main.go b/tools/mkint/main.go new file mode 100644 index 0000000..2aaf768 --- /dev/null +++ b/tools/mkint/main.go @@ -0,0 +1,163 @@ +// Command mkencint generates integer encoding/decoding functions. +package main + +import ( + "bytes" + _ "embed" + "flag" + "fmt" + "go/format" + "io" + "math" + "os" + "strconv" + "text/template" + "unicode" + "unicode/utf8" +) + +// IntType represents Go integer type. +type IntType struct { + Name string + EncoderIterations int // ceil(log1000 (max value)) + DecoderIterations int // ceil(log10 (max value)) +} + +func defineIntType(name string, max uint64) IntType { + formattedLen := len(strconv.FormatUint(max, 10)) + decoderIters := formattedLen + + const decoderItersLimit = 10 - 1 + if decoderIters > decoderItersLimit { + decoderIters = decoderItersLimit + } + return IntType{ + Name: name, + EncoderIterations: formattedLen/3 + 1, // Compute maximum pow of 1000 plus remainder. + DecoderIterations: decoderIters, // Compute maximum pow of 10 plus remainder. + } +} + +var intTypes = []IntType{ + defineIntType("int8", math.MaxUint8), + defineIntType("int16", math.MaxUint16), + defineIntType("int32", math.MaxUint32), + defineIntType("int64", math.MaxUint64), +} + +// Config is generation config. +type Config struct { + PackageName string + Types []IntType +} + +func times(num int) []struct{} { + return make([]struct{}, num) +} + +func title(s string) string { + r, size := utf8.DecodeRuneInString(s) + return string(unicode.ToUpper(r)) + s[size:] +} + +func add(a, b int) int { + return a + b +} + +func sub(a, b int) int { + return a - b +} + +func pow10(power int) (r int) { + if power <= 0 { + return 1 + } + r = 10 + for i := 1; i < power; i++ { + r *= 10 + } + return r +} + +func executeTemplate(w io.Writer, tmpl string, cfg Config) error { + var buf bytes.Buffer + + t := template.Must(template.New("gen").Funcs(template.FuncMap{ + "times": times, + "title": title, + "add": add, + "sub": sub, + "pow10": pow10, + }).Parse(tmpl)) + if err := t.ExecuteTemplate(&buf, "main", cfg); err != nil { + return fmt.Errorf("execute: %w", err) + } + + formatted, err := format.Source(buf.Bytes()) + if err != nil { + _, _ = os.Stderr.Write(buf.Bytes()) + return fmt.Errorf("format: %w", err) + } + + if _, err := w.Write(formatted); err != nil { + return fmt.Errorf("write: %w", err) + } + + return nil +} + +//go:embed encode.tmpl +var encodeTemplate string + +func generateEncode(w io.Writer, pkgName string) error { + return executeTemplate(w, encodeTemplate, Config{ + PackageName: pkgName, + Types: intTypes[1:], // Skip int8, use manual encoder. + }) +} + +//go:embed decode.tmpl +var decodeTemplate string + +func generateDecode(w io.Writer, pkgName string) error { + return executeTemplate(w, decodeTemplate, Config{ + PackageName: pkgName, + Types: intTypes, + }) +} + +func run() error { + var ( + pkgName = flag.String("package", "jx", "package name") + ) + flag.Parse() + + for _, file := range []struct { + name string + f func(io.Writer, string) error + }{ + {"w_int.gen.go", generateEncode}, + {"dec_int.gen.go", generateDecode}, + } { + if err := func() error { + f, err := os.Create(file.name) + if err != nil { + return err + } + defer func() { + _ = f.Close() + }() + + return file.f(f, *pkgName) + }(); err != nil { + return fmt.Errorf("generate %s: %w", file.name, err) + } + } + return nil +} + +func main() { + if err := run(); err != nil { + panic(err) + } +} diff --git a/w_int.go b/w_int.go index 965f47f..bff36c4 100644 --- a/w_int.go +++ b/w_int.go @@ -1,7 +1,5 @@ package jx -//go:generate go run ./tools/mkencint -output w_int.gen.go - // Int encodes int. func (w *Writer) Int(v int) { w.Int64(int64(v)) From 082f19b6fef19c158ac718f7b975c618a993dc0e Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 19 Jan 2023 13:08:15 +0300 Subject: [PATCH 07/10] chore: use tabs for indent in Go templates --- .editorconfig | 2 +- tools/mkint/decode.tmpl | 166 ++++++++++++++++++++-------------------- tools/mkint/encode.tmpl | 52 ++++++------- 3 files changed, 110 insertions(+), 110 deletions(-) diff --git a/.editorconfig b/.editorconfig index db2e5a0..16c4895 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ insert_final_newline = true trim_trailing_whitespace = true end_of_line = lf -[{*.go, go.mod}] +[{*.go, go.mod, *.tmpl}] indent_style = tab indent_size = 4 diff --git a/tools/mkint/decode.tmpl b/tools/mkint/decode.tmpl index 36b3aec..13d737d 100644 --- a/tools/mkint/decode.tmpl +++ b/tools/mkint/decode.tmpl @@ -1,84 +1,84 @@ {{ define "main" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} - // Code generated by mkint, DO NOT EDIT. +{{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} +// Code generated by mkint, DO NOT EDIT. - package {{ $.PackageName }} +package {{ $.PackageName }} - import ( +import ( "io" "math" "strconv" "github.com/go-faster/errors" - ) +) - var ( +var ( intDigits [256]int8 errOverflow = strconv.ErrRange - ) +) - const ( +const ( uint8SafeToMultiple10 = uint8(0xff)/10 - 1 uint16SafeToMultiple10 = uint16(0xffff)/10 - 1 uint32SafeToMultiple10 = uint32(0xffffffff)/10 - 1 uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1 - ) +) - func init() { +func init() { for i := 0; i < len(intDigits); i++ { - intDigits[i] = invalidCharForNumber + intDigits[i] = invalidCharForNumber } for i := int8('0'); i <= int8('9'); i++ { - intDigits[i] = i - int8('0') - } + intDigits[i] = i - int8('0') } +} - {{ range $typ := $.Types }} - {{ template "decode_uint" $typ }} - {{ template "decode_int" $typ }} - {{- end }} +{{ range $typ := $.Types }} + {{ template "decode_uint" $typ }} + {{ template "decode_int" $typ }} +{{- end }} {{ end }} {{ define "decode_uint" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} - // U{{ title $.Name }} reads u{{ $.Name }}. - func (d *Decoder) U{{ title $.Name }}() (u{{ $.Name }}, error) { +{{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} +// U{{ title $.Name }} reads u{{ $.Name }}. +func (d *Decoder) U{{ title $.Name }}() (u{{ $.Name }}, error) { c, err := d.more() if err != nil { - return 0, err + return 0, err } return d.readU{{ title $.Name }}(c) - } +} - func (d *Decoder) readU{{ title $.Name }}(c byte) (u{{ $.Name }}, error) { +func (d *Decoder) readU{{ title $.Name }}(c byte) (u{{ $.Name }}, error) { ind := intDigits[c] switch ind { case 0: - // Check that next byte is not a digit. - c, err := d.peek() - if err == nil && intDigits[c] != invalidCharForNumber { - err := badToken(c, d.offset()) - return 0, errors.Wrap(err, "digit after leading zero") - } - return 0, nil // single zero + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero case invalidCharForNumber: - return 0, badToken(c, d.offset()-1) + return 0, badToken(c, d.offset()-1) } value := u{{ $.Name }}(ind) if d.tail-d.head > {{ $.DecoderIterations }} { - i := d.head + i := d.head {{- range $i, $_ := times $.DecoderIterations }} // Iteration {{ $i }}. ind{{ add $i 2 }} := intDigits[d.buf[i]] if ind{{ add $i 2 }} == invalidCharForNumber { - d.head = i - value *= {{ pow10 $i }} - {{- range $r, $_ := times $i }} - value += u{{ $.Name }}(ind{{ add $r 2 }}) * {{ pow10 (sub (sub $i $r) 1) }} - {{- end }} - return value, nil + d.head = i + value *= {{ pow10 $i }} + {{- range $r, $_ := times $i }} + value += u{{ $.Name }}(ind{{ add $r 2 }}) * {{ pow10 (sub (sub $i $r) 1) }} + {{- end }} + return value, nil } {{- if eq $i (sub $.DecoderIterations 1) }} @@ -94,64 +94,64 @@ {{- end }} } for { - buf := d.buf[d.head:d.tail] - for i, c := range buf { - ind = intDigits[c] - if ind == invalidCharForNumber { - d.head += i - return value, nil - } - if value > u{{ $.Name }}SafeToMultiple10 { - value2 := (value << 3) + (value << 1) + u{{ $.Name }}(ind) - if value2 < value { - return 0, errOverflow - } - value = value2 - continue - } - value = (value << 3) + (value << 1) + u{{ $.Name }}(ind) - } - switch err := d.read(); err { - case io.EOF: - return value, nil - case nil: - continue - default: - return 0, err - } - } + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > u{{ $.Name }}SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + u{{ $.Name }}(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + u{{ $.Name }}(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } } +} {{ end }} {{ define "decode_int" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} - // {{ title $.Name }} reads {{ $.Name }}. - func (d *Decoder) {{ title $.Name }}() ({{ $.Name }}, error) { +{{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} +// {{ title $.Name }} reads {{ $.Name }}. +func (d *Decoder) {{ title $.Name }}() ({{ $.Name }}, error) { c, err := d.more() if err != nil { - return 0, err + return 0, err } if c == '-' { - c, err := d.byte() - if err != nil { - return 0, err - } - val, err := d.readU{{ title $.Name }}(c) - if err != nil { - return 0, err - } - if val > math.Max{{ title $.Name }}+1 { - return 0, errors.Errorf("%d overflows", val) - } - return -{{ $.Name }}(val), nil + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readU{{ title $.Name }}(c) + if err != nil { + return 0, err + } + if val > math.Max{{ title $.Name }}+1 { + return 0, errors.Errorf("%d overflows", val) + } + return -{{ $.Name }}(val), nil } val, err := d.readU{{ title $.Name }}(c) if err != nil { - return 0, err + return 0, err } if val > math.Max{{ title $.Name }} { - return 0, errors.Errorf("%d overflows", val) + return 0, errors.Errorf("%d overflows", val) } return {{ $.Name }}(val), nil - } +} {{ end }} diff --git a/tools/mkint/encode.tmpl b/tools/mkint/encode.tmpl index 9c44be1..edc5286 100644 --- a/tools/mkint/encode.tmpl +++ b/tools/mkint/encode.tmpl @@ -1,16 +1,16 @@ {{ define "main" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} - // Code generated by mkint, DO NOT EDIT. +{{- /*gotype: github.com/go-faster/jx/tools/mkint.Config*/ -}} +// Code generated by mkint, DO NOT EDIT. - package {{ $.PackageName }} +package {{ $.PackageName }} - var digits []uint32 +var digits []uint32 - func init() { - digits = make([]uint32, 1000) +func init() { + digits = make([]uint32, 1000) for i := uint32(0); i < 1000; i++ { - digits[i] = (((i / 100) + '0') << 16) + ((((i / 10) % 10) + '0') << 8) + i%10 + '0' - if i < 10 { + digits[i] = (((i / 100) + '0') << 16) + ((((i / 10) % 10) + '0') << 8) + i%10 + '0' + if i < 10 { digits[i] += 2 << 24 } else if i < 100 { digits[i] += 1 << 24 @@ -41,27 +41,27 @@ func writeBuf(buf []byte, v uint32) []byte { {{ end }} {{ define "encode_uint" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} +{{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} // U{{ title $.Name }} encodes u{{ $.Name }}. func (w *Writer) U{{ title $.Name }}(v u{{ $.Name }}) { q0 := v - {{- range $i, $_ := times $.EncoderIterations }} - // Iteration {{ $i }}. - {{- if not (eq $i 0) }} - r{{$i}} := q{{add $i -1}} - q{{$i}}*1000 - {{- end }} + {{- range $i, $_ := times $.EncoderIterations }} + // Iteration {{ $i }}. + {{- if not (eq $i 0) }} + r{{$i}} := q{{add $i -1}} - q{{$i}}*1000 + {{- end }} - {{- if eq $i (sub $.EncoderIterations 1) }} - w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) - {{- range $r, $_ := times $i }} - w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) - {{- end }} - {{- else }} - q{{add $i 1}} := q{{$i}} / 1000 - if q{{add $i 1}} == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) - {{- range $r, $_ := times $i }} - w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) + {{- if eq $i (sub $.EncoderIterations 1) }} + w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) + {{- range $r, $_ := times $i }} + w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) + {{- end }} + {{- else }} + q{{add $i 1}} := q{{$i}} / 1000 + if q{{add $i 1}} == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q{{$i}}]) + {{- range $r, $_ := times $i }} + w.Buf = writeBuf(w.Buf, digits[r{{sub $i $r}}]) {{- end }} return } @@ -77,7 +77,7 @@ func (e *Encoder) U{{ title $.Name }}(v u{{ $.Name }}) { {{ end }} {{ define "encode_int" }} - {{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} +{{- /*gotype: github.com/go-faster/jx/tools/mkint.IntType */ -}} // {{ title $.Name }} encodes {{ $.Name }}. func (w *Writer) {{ title $.Name }}(v {{ $.Name }}) { var val u{{ $.Name }} From d92bc3a13ecc936f4919f53716ba8628c69aa551 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Fri, 20 Jan 2023 04:52:59 +0300 Subject: [PATCH 08/10] fix: use `errOverflow` everywhere --- tools/mkint/decode.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mkint/decode.tmpl b/tools/mkint/decode.tmpl index 13d737d..a01d0fa 100644 --- a/tools/mkint/decode.tmpl +++ b/tools/mkint/decode.tmpl @@ -141,7 +141,7 @@ func (d *Decoder) {{ title $.Name }}() ({{ $.Name }}, error) { return 0, err } if val > math.Max{{ title $.Name }}+1 { - return 0, errors.Errorf("%d overflows", val) + return 0, errOverflow } return -{{ $.Name }}(val), nil } @@ -150,7 +150,7 @@ func (d *Decoder) {{ title $.Name }}() ({{ $.Name }}, error) { return 0, err } if val > math.Max{{ title $.Name }} { - return 0, errors.Errorf("%d overflows", val) + return 0, errOverflow } return {{ $.Name }}(val), nil } From d9997aa7a6f603611b20236580f5f6814629a231 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Fri, 20 Jan 2023 06:50:53 +0300 Subject: [PATCH 09/10] fix: always use safe float slow path Using noescape hack may lead to use-after-free error. Error returned by `strconv.ParseFloat` may refer to allocated slice. --- dec_float.go | 20 ++++++++++++++++++++ dec_parse_float_safe.go | 28 ---------------------------- dec_parse_float_unsafe.go | 34 ---------------------------------- runtime.go | 20 -------------------- 4 files changed, 20 insertions(+), 82 deletions(-) delete mode 100644 dec_parse_float_safe.go delete mode 100644 dec_parse_float_unsafe.go delete mode 100644 runtime.go diff --git a/dec_float.go b/dec_float.go index 68c7061..9736ea4 100644 --- a/dec_float.go +++ b/dec_float.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "math/big" + "strconv" "github.com/go-faster/errors" ) @@ -355,3 +356,22 @@ func validateFloat(str []byte) error { } return nil } + +func (d *Decoder) floatSlow(size int) (float64, error) { + var buf [32]byte + + str, err := d.numberAppend(buf[:0]) + if err != nil { + return 0, errors.Wrap(err, "number") + } + if err := validateFloat(str); err != nil { + return 0, errors.Wrap(err, "invalid") + } + + val, err := strconv.ParseFloat(string(str), size) + if err != nil { + return 0, err + } + + return val, nil +} diff --git a/dec_parse_float_safe.go b/dec_parse_float_safe.go deleted file mode 100644 index b85fa74..0000000 --- a/dec_parse_float_safe.go +++ /dev/null @@ -1,28 +0,0 @@ -//go:build appengine || purego - -package jx - -import ( - "strconv" - - "github.com/go-faster/errors" -) - -func (d *Decoder) floatSlow(size int) (float64, error) { - var buf [32]byte - - str, err := d.numberAppend(buf[:0]) - if err != nil { - return 0, errors.Wrap(err, "number") - } - if err := validateFloat(str); err != nil { - return 0, errors.Wrap(err, "invalid") - } - - val, err := strconv.ParseFloat(string(str), size) - if err != nil { - return 0, err - } - - return val, nil -} diff --git a/dec_parse_float_unsafe.go b/dec_parse_float_unsafe.go deleted file mode 100644 index 6965e81..0000000 --- a/dec_parse_float_unsafe.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build !appengine && !purego - -package jx - -import ( - "strconv" - "unsafe" - - "github.com/go-faster/errors" -) - -func (d *Decoder) floatSlow(size int) (float64, error) { - var buf [32]byte - - str, err := d.numberAppend(buf[:0]) - if err != nil { - return 0, errors.Wrap(err, "number") - } - if err := validateFloat(str); err != nil { - return 0, errors.Wrap(err, "invalid") - } - - slice := *(*sliceType)(unsafe.Pointer(&str)) // #nosec G103 - s := strType{ - Ptr: noescape(slice.Ptr), - Len: slice.Len, - } - val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&s)), size) // #nosec G103 - if err != nil { - return 0, err - } - - return val, nil -} diff --git a/runtime.go b/runtime.go deleted file mode 100644 index dc2a2f9..0000000 --- a/runtime.go +++ /dev/null @@ -1,20 +0,0 @@ -//go:build !appengine && !purego - -package jx - -import "unsafe" - -type sliceType struct { - Ptr unsafe.Pointer - Len uintptr - Cap uintptr -} - -type strType struct { - Ptr unsafe.Pointer - Len uintptr -} - -//go:noescape -//go:linkname noescape runtime.noescape -func noescape(unsafe.Pointer) unsafe.Pointer From cc57d4ddc7263b5feea20b951ddee67589df8c79 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Thu, 19 Jan 2023 04:33:09 +0300 Subject: [PATCH 10/10] chore: commit generated files --- dec_int.gen.go | 695 +++++++++++++++++++++++++++++++++++++++++++++++++ w_int.gen.go | 172 ++++++------ 2 files changed, 781 insertions(+), 86 deletions(-) create mode 100644 dec_int.gen.go diff --git a/dec_int.gen.go b/dec_int.gen.go new file mode 100644 index 0000000..fdf41c9 --- /dev/null +++ b/dec_int.gen.go @@ -0,0 +1,695 @@ +// Code generated by mkint, DO NOT EDIT. + +package jx + +import ( + "io" + "math" + "strconv" + + "github.com/go-faster/errors" +) + +var ( + intDigits [256]int8 + errOverflow = strconv.ErrRange +) + +const ( + uint8SafeToMultiple10 = uint8(0xff)/10 - 1 + uint16SafeToMultiple10 = uint16(0xffff)/10 - 1 + uint32SafeToMultiple10 = uint32(0xffffffff)/10 - 1 + uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1 +) + +func init() { + for i := 0; i < len(intDigits); i++ { + intDigits[i] = invalidCharForNumber + } + for i := int8('0'); i <= int8('9'); i++ { + intDigits[i] = i - int8('0') + } +} + +// UInt8 reads uint8. +func (d *Decoder) UInt8() (uint8, error) { + c, err := d.more() + if err != nil { + return 0, err + } + return d.readUInt8(c) +} + +func (d *Decoder) readUInt8(c byte) (uint8, error) { + ind := intDigits[c] + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero + case invalidCharForNumber: + return 0, badToken(c, d.offset()-1) + } + value := uint8(ind) + if d.tail-d.head > 3 { + i := d.head + // Iteration 0. + ind2 := intDigits[d.buf[i]] + if ind2 == invalidCharForNumber { + d.head = i + value *= 1 + return value, nil + } + i++ + // Iteration 1. + ind3 := intDigits[d.buf[i]] + if ind3 == invalidCharForNumber { + d.head = i + value *= 10 + value += uint8(ind2) * 1 + return value, nil + } + i++ + // Iteration 2. + ind4 := intDigits[d.buf[i]] + if ind4 == invalidCharForNumber { + d.head = i + value *= 100 + value += uint8(ind2) * 10 + value += uint8(ind3) * 1 + return value, nil + } + d.head = i + value *= 100 + value += uint8(ind2) * 10 + value += uint8(ind3) * 1 + } + for { + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > uint8SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + uint8(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + uint8(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } + } +} + +// Int8 reads int8. +func (d *Decoder) Int8() (int8, error) { + c, err := d.more() + if err != nil { + return 0, err + } + if c == '-' { + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt8(c) + if err != nil { + return 0, err + } + if val > math.MaxInt8+1 { + return 0, errOverflow + } + return -int8(val), nil + } + val, err := d.readUInt8(c) + if err != nil { + return 0, err + } + if val > math.MaxInt8 { + return 0, errOverflow + } + return int8(val), nil +} + +// UInt16 reads uint16. +func (d *Decoder) UInt16() (uint16, error) { + c, err := d.more() + if err != nil { + return 0, err + } + return d.readUInt16(c) +} + +func (d *Decoder) readUInt16(c byte) (uint16, error) { + ind := intDigits[c] + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero + case invalidCharForNumber: + return 0, badToken(c, d.offset()-1) + } + value := uint16(ind) + if d.tail-d.head > 5 { + i := d.head + // Iteration 0. + ind2 := intDigits[d.buf[i]] + if ind2 == invalidCharForNumber { + d.head = i + value *= 1 + return value, nil + } + i++ + // Iteration 1. + ind3 := intDigits[d.buf[i]] + if ind3 == invalidCharForNumber { + d.head = i + value *= 10 + value += uint16(ind2) * 1 + return value, nil + } + i++ + // Iteration 2. + ind4 := intDigits[d.buf[i]] + if ind4 == invalidCharForNumber { + d.head = i + value *= 100 + value += uint16(ind2) * 10 + value += uint16(ind3) * 1 + return value, nil + } + i++ + // Iteration 3. + ind5 := intDigits[d.buf[i]] + if ind5 == invalidCharForNumber { + d.head = i + value *= 1000 + value += uint16(ind2) * 100 + value += uint16(ind3) * 10 + value += uint16(ind4) * 1 + return value, nil + } + i++ + // Iteration 4. + ind6 := intDigits[d.buf[i]] + if ind6 == invalidCharForNumber { + d.head = i + value *= 10000 + value += uint16(ind2) * 1000 + value += uint16(ind3) * 100 + value += uint16(ind4) * 10 + value += uint16(ind5) * 1 + return value, nil + } + d.head = i + value *= 10000 + value += uint16(ind2) * 1000 + value += uint16(ind3) * 100 + value += uint16(ind4) * 10 + value += uint16(ind5) * 1 + } + for { + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > uint16SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + uint16(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + uint16(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } + } +} + +// Int16 reads int16. +func (d *Decoder) Int16() (int16, error) { + c, err := d.more() + if err != nil { + return 0, err + } + if c == '-' { + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt16(c) + if err != nil { + return 0, err + } + if val > math.MaxInt16+1 { + return 0, errOverflow + } + return -int16(val), nil + } + val, err := d.readUInt16(c) + if err != nil { + return 0, err + } + if val > math.MaxInt16 { + return 0, errOverflow + } + return int16(val), nil +} + +// UInt32 reads uint32. +func (d *Decoder) UInt32() (uint32, error) { + c, err := d.more() + if err != nil { + return 0, err + } + return d.readUInt32(c) +} + +func (d *Decoder) readUInt32(c byte) (uint32, error) { + ind := intDigits[c] + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero + case invalidCharForNumber: + return 0, badToken(c, d.offset()-1) + } + value := uint32(ind) + if d.tail-d.head > 9 { + i := d.head + // Iteration 0. + ind2 := intDigits[d.buf[i]] + if ind2 == invalidCharForNumber { + d.head = i + value *= 1 + return value, nil + } + i++ + // Iteration 1. + ind3 := intDigits[d.buf[i]] + if ind3 == invalidCharForNumber { + d.head = i + value *= 10 + value += uint32(ind2) * 1 + return value, nil + } + i++ + // Iteration 2. + ind4 := intDigits[d.buf[i]] + if ind4 == invalidCharForNumber { + d.head = i + value *= 100 + value += uint32(ind2) * 10 + value += uint32(ind3) * 1 + return value, nil + } + i++ + // Iteration 3. + ind5 := intDigits[d.buf[i]] + if ind5 == invalidCharForNumber { + d.head = i + value *= 1000 + value += uint32(ind2) * 100 + value += uint32(ind3) * 10 + value += uint32(ind4) * 1 + return value, nil + } + i++ + // Iteration 4. + ind6 := intDigits[d.buf[i]] + if ind6 == invalidCharForNumber { + d.head = i + value *= 10000 + value += uint32(ind2) * 1000 + value += uint32(ind3) * 100 + value += uint32(ind4) * 10 + value += uint32(ind5) * 1 + return value, nil + } + i++ + // Iteration 5. + ind7 := intDigits[d.buf[i]] + if ind7 == invalidCharForNumber { + d.head = i + value *= 100000 + value += uint32(ind2) * 10000 + value += uint32(ind3) * 1000 + value += uint32(ind4) * 100 + value += uint32(ind5) * 10 + value += uint32(ind6) * 1 + return value, nil + } + i++ + // Iteration 6. + ind8 := intDigits[d.buf[i]] + if ind8 == invalidCharForNumber { + d.head = i + value *= 1000000 + value += uint32(ind2) * 100000 + value += uint32(ind3) * 10000 + value += uint32(ind4) * 1000 + value += uint32(ind5) * 100 + value += uint32(ind6) * 10 + value += uint32(ind7) * 1 + return value, nil + } + i++ + // Iteration 7. + ind9 := intDigits[d.buf[i]] + if ind9 == invalidCharForNumber { + d.head = i + value *= 10000000 + value += uint32(ind2) * 1000000 + value += uint32(ind3) * 100000 + value += uint32(ind4) * 10000 + value += uint32(ind5) * 1000 + value += uint32(ind6) * 100 + value += uint32(ind7) * 10 + value += uint32(ind8) * 1 + return value, nil + } + i++ + // Iteration 8. + ind10 := intDigits[d.buf[i]] + if ind10 == invalidCharForNumber { + d.head = i + value *= 100000000 + value += uint32(ind2) * 10000000 + value += uint32(ind3) * 1000000 + value += uint32(ind4) * 100000 + value += uint32(ind5) * 10000 + value += uint32(ind6) * 1000 + value += uint32(ind7) * 100 + value += uint32(ind8) * 10 + value += uint32(ind9) * 1 + return value, nil + } + d.head = i + value *= 100000000 + value += uint32(ind2) * 10000000 + value += uint32(ind3) * 1000000 + value += uint32(ind4) * 100000 + value += uint32(ind5) * 10000 + value += uint32(ind6) * 1000 + value += uint32(ind7) * 100 + value += uint32(ind8) * 10 + value += uint32(ind9) * 1 + } + for { + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > uint32SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + uint32(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + uint32(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } + } +} + +// Int32 reads int32. +func (d *Decoder) Int32() (int32, error) { + c, err := d.more() + if err != nil { + return 0, err + } + if c == '-' { + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt32(c) + if err != nil { + return 0, err + } + if val > math.MaxInt32+1 { + return 0, errOverflow + } + return -int32(val), nil + } + val, err := d.readUInt32(c) + if err != nil { + return 0, err + } + if val > math.MaxInt32 { + return 0, errOverflow + } + return int32(val), nil +} + +// UInt64 reads uint64. +func (d *Decoder) UInt64() (uint64, error) { + c, err := d.more() + if err != nil { + return 0, err + } + return d.readUInt64(c) +} + +func (d *Decoder) readUInt64(c byte) (uint64, error) { + ind := intDigits[c] + switch ind { + case 0: + // Check that next byte is not a digit. + c, err := d.peek() + if err == nil && intDigits[c] != invalidCharForNumber { + err := badToken(c, d.offset()) + return 0, errors.Wrap(err, "digit after leading zero") + } + return 0, nil // single zero + case invalidCharForNumber: + return 0, badToken(c, d.offset()-1) + } + value := uint64(ind) + if d.tail-d.head > 9 { + i := d.head + // Iteration 0. + ind2 := intDigits[d.buf[i]] + if ind2 == invalidCharForNumber { + d.head = i + value *= 1 + return value, nil + } + i++ + // Iteration 1. + ind3 := intDigits[d.buf[i]] + if ind3 == invalidCharForNumber { + d.head = i + value *= 10 + value += uint64(ind2) * 1 + return value, nil + } + i++ + // Iteration 2. + ind4 := intDigits[d.buf[i]] + if ind4 == invalidCharForNumber { + d.head = i + value *= 100 + value += uint64(ind2) * 10 + value += uint64(ind3) * 1 + return value, nil + } + i++ + // Iteration 3. + ind5 := intDigits[d.buf[i]] + if ind5 == invalidCharForNumber { + d.head = i + value *= 1000 + value += uint64(ind2) * 100 + value += uint64(ind3) * 10 + value += uint64(ind4) * 1 + return value, nil + } + i++ + // Iteration 4. + ind6 := intDigits[d.buf[i]] + if ind6 == invalidCharForNumber { + d.head = i + value *= 10000 + value += uint64(ind2) * 1000 + value += uint64(ind3) * 100 + value += uint64(ind4) * 10 + value += uint64(ind5) * 1 + return value, nil + } + i++ + // Iteration 5. + ind7 := intDigits[d.buf[i]] + if ind7 == invalidCharForNumber { + d.head = i + value *= 100000 + value += uint64(ind2) * 10000 + value += uint64(ind3) * 1000 + value += uint64(ind4) * 100 + value += uint64(ind5) * 10 + value += uint64(ind6) * 1 + return value, nil + } + i++ + // Iteration 6. + ind8 := intDigits[d.buf[i]] + if ind8 == invalidCharForNumber { + d.head = i + value *= 1000000 + value += uint64(ind2) * 100000 + value += uint64(ind3) * 10000 + value += uint64(ind4) * 1000 + value += uint64(ind5) * 100 + value += uint64(ind6) * 10 + value += uint64(ind7) * 1 + return value, nil + } + i++ + // Iteration 7. + ind9 := intDigits[d.buf[i]] + if ind9 == invalidCharForNumber { + d.head = i + value *= 10000000 + value += uint64(ind2) * 1000000 + value += uint64(ind3) * 100000 + value += uint64(ind4) * 10000 + value += uint64(ind5) * 1000 + value += uint64(ind6) * 100 + value += uint64(ind7) * 10 + value += uint64(ind8) * 1 + return value, nil + } + i++ + // Iteration 8. + ind10 := intDigits[d.buf[i]] + if ind10 == invalidCharForNumber { + d.head = i + value *= 100000000 + value += uint64(ind2) * 10000000 + value += uint64(ind3) * 1000000 + value += uint64(ind4) * 100000 + value += uint64(ind5) * 10000 + value += uint64(ind6) * 1000 + value += uint64(ind7) * 100 + value += uint64(ind8) * 10 + value += uint64(ind9) * 1 + return value, nil + } + d.head = i + value *= 100000000 + value += uint64(ind2) * 10000000 + value += uint64(ind3) * 1000000 + value += uint64(ind4) * 100000 + value += uint64(ind5) * 10000 + value += uint64(ind6) * 1000 + value += uint64(ind7) * 100 + value += uint64(ind8) * 10 + value += uint64(ind9) * 1 + } + for { + buf := d.buf[d.head:d.tail] + for i, c := range buf { + ind = intDigits[c] + if ind == invalidCharForNumber { + d.head += i + return value, nil + } + if value > uint64SafeToMultiple10 { + value2 := (value << 3) + (value << 1) + uint64(ind) + if value2 < value { + return 0, errOverflow + } + value = value2 + continue + } + value = (value << 3) + (value << 1) + uint64(ind) + } + switch err := d.read(); err { + case io.EOF: + return value, nil + case nil: + continue + default: + return 0, err + } + } +} + +// Int64 reads int64. +func (d *Decoder) Int64() (int64, error) { + c, err := d.more() + if err != nil { + return 0, err + } + if c == '-' { + c, err := d.byte() + if err != nil { + return 0, err + } + val, err := d.readUInt64(c) + if err != nil { + return 0, err + } + if val > math.MaxInt64+1 { + return 0, errOverflow + } + return -int64(val), nil + } + val, err := d.readUInt64(c) + if err != nil { + return 0, err + } + if val > math.MaxInt64 { + return 0, errOverflow + } + return int64(val), nil +} diff --git a/w_int.gen.go b/w_int.gen.go index 09c790f..412777b 100644 --- a/w_int.gen.go +++ b/w_int.gen.go @@ -1,4 +1,4 @@ -// Code generated by mkencint, DO NOT EDIT. +// Code generated by mkint, DO NOT EDIT. package jx @@ -31,8 +31,8 @@ func writeBuf(buf []byte, v uint32) []byte { return append(buf, byte(v>>16), byte(v>>8), byte(v)) } -// UInt64 encodes uint64. -func (w *Writer) UInt64(v uint64) { +// UInt16 encodes uint16. +func (w *Writer) UInt16(v uint16) { q0 := v // Iteration 0. q1 := q0 / 1000 @@ -42,87 +42,32 @@ func (w *Writer) UInt64(v uint64) { } // Iteration 1. r1 := q0 - q1*1000 - q2 := q1 / 1000 - if q2 == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q1]) - w.Buf = writeBuf(w.Buf, digits[r1]) - return - } - // Iteration 2. - r2 := q1 - q2*1000 - q3 := q2 / 1000 - if q3 == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q2]) - w.Buf = writeBuf(w.Buf, digits[r2]) - w.Buf = writeBuf(w.Buf, digits[r1]) - return - } - // Iteration 3. - r3 := q2 - q3*1000 - q4 := q3 / 1000 - if q4 == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q3]) - w.Buf = writeBuf(w.Buf, digits[r3]) - w.Buf = writeBuf(w.Buf, digits[r2]) - w.Buf = writeBuf(w.Buf, digits[r1]) - return - } - // Iteration 4. - r4 := q3 - q4*1000 - q5 := q4 / 1000 - if q5 == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q4]) - w.Buf = writeBuf(w.Buf, digits[r4]) - w.Buf = writeBuf(w.Buf, digits[r3]) - w.Buf = writeBuf(w.Buf, digits[r2]) - w.Buf = writeBuf(w.Buf, digits[r1]) - return - } - // Iteration 5. - r5 := q4 - q5*1000 - q6 := q5 / 1000 - if q6 == 0 { - w.Buf = writeFirstBuf(w.Buf, digits[q5]) - w.Buf = writeBuf(w.Buf, digits[r5]) - w.Buf = writeBuf(w.Buf, digits[r4]) - w.Buf = writeBuf(w.Buf, digits[r3]) - w.Buf = writeBuf(w.Buf, digits[r2]) - w.Buf = writeBuf(w.Buf, digits[r1]) - return - } - // Iteration 6. - r6 := q5 - q6*1000 - w.Buf = writeFirstBuf(w.Buf, digits[q6]) - w.Buf = writeBuf(w.Buf, digits[r6]) - w.Buf = writeBuf(w.Buf, digits[r5]) - w.Buf = writeBuf(w.Buf, digits[r4]) - w.Buf = writeBuf(w.Buf, digits[r3]) - w.Buf = writeBuf(w.Buf, digits[r2]) + w.Buf = writeFirstBuf(w.Buf, digits[q1]) w.Buf = writeBuf(w.Buf, digits[r1]) } -// UInt64 encodes uint64. -func (e *Encoder) UInt64(v uint64) { +// UInt16 encodes uint16. +func (e *Encoder) UInt16(v uint16) { e.comma() - e.w.UInt64(v) + e.w.UInt16(v) } -// Int64 encodes int64. -func (w *Writer) Int64(v int64) { - var val uint64 +// Int16 encodes int16. +func (w *Writer) Int16(v int16) { + var val uint16 if v < 0 { - val = uint64(-v) + val = uint16(-v) w.Buf = append(w.Buf, '-') } else { - val = uint64(v) + val = uint16(v) } - w.UInt64(val) + w.UInt16(val) } -// Int64 encodes int64. -func (e *Encoder) Int64(v int64) { +// Int16 encodes int16. +func (e *Encoder) Int16(v int16) { e.comma() - e.w.Int64(v) + e.w.Int16(v) } // UInt32 encodes uint32. @@ -183,8 +128,8 @@ func (e *Encoder) Int32(v int32) { e.w.Int32(v) } -// UInt16 encodes uint16. -func (w *Writer) UInt16(v uint16) { +// UInt64 encodes uint64. +func (w *Writer) UInt64(v uint64) { q0 := v // Iteration 0. q1 := q0 / 1000 @@ -194,30 +139,85 @@ func (w *Writer) UInt16(v uint16) { } // Iteration 1. r1 := q0 - q1*1000 - w.Buf = writeFirstBuf(w.Buf, digits[q1]) + q2 := q1 / 1000 + if q2 == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q1]) + w.Buf = writeBuf(w.Buf, digits[r1]) + return + } + // Iteration 2. + r2 := q1 - q2*1000 + q3 := q2 / 1000 + if q3 == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q2]) + w.Buf = writeBuf(w.Buf, digits[r2]) + w.Buf = writeBuf(w.Buf, digits[r1]) + return + } + // Iteration 3. + r3 := q2 - q3*1000 + q4 := q3 / 1000 + if q4 == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q3]) + w.Buf = writeBuf(w.Buf, digits[r3]) + w.Buf = writeBuf(w.Buf, digits[r2]) + w.Buf = writeBuf(w.Buf, digits[r1]) + return + } + // Iteration 4. + r4 := q3 - q4*1000 + q5 := q4 / 1000 + if q5 == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q4]) + w.Buf = writeBuf(w.Buf, digits[r4]) + w.Buf = writeBuf(w.Buf, digits[r3]) + w.Buf = writeBuf(w.Buf, digits[r2]) + w.Buf = writeBuf(w.Buf, digits[r1]) + return + } + // Iteration 5. + r5 := q4 - q5*1000 + q6 := q5 / 1000 + if q6 == 0 { + w.Buf = writeFirstBuf(w.Buf, digits[q5]) + w.Buf = writeBuf(w.Buf, digits[r5]) + w.Buf = writeBuf(w.Buf, digits[r4]) + w.Buf = writeBuf(w.Buf, digits[r3]) + w.Buf = writeBuf(w.Buf, digits[r2]) + w.Buf = writeBuf(w.Buf, digits[r1]) + return + } + // Iteration 6. + r6 := q5 - q6*1000 + w.Buf = writeFirstBuf(w.Buf, digits[q6]) + w.Buf = writeBuf(w.Buf, digits[r6]) + w.Buf = writeBuf(w.Buf, digits[r5]) + w.Buf = writeBuf(w.Buf, digits[r4]) + w.Buf = writeBuf(w.Buf, digits[r3]) + w.Buf = writeBuf(w.Buf, digits[r2]) w.Buf = writeBuf(w.Buf, digits[r1]) } -// UInt16 encodes uint16. -func (e *Encoder) UInt16(v uint16) { +// UInt64 encodes uint64. +func (e *Encoder) UInt64(v uint64) { e.comma() - e.w.UInt16(v) + e.w.UInt64(v) } -// Int16 encodes int16. -func (w *Writer) Int16(v int16) { - var val uint16 +// Int64 encodes int64. +func (w *Writer) Int64(v int64) { + var val uint64 if v < 0 { - val = uint16(-v) + val = uint64(-v) w.Buf = append(w.Buf, '-') } else { - val = uint16(v) + val = uint64(v) } - w.UInt16(val) + w.UInt64(val) } -// Int16 encodes int16. -func (e *Encoder) Int16(v int16) { +// Int64 encodes int64. +func (e *Encoder) Int64(v int64) { e.comma() - e.w.Int16(v) + e.w.Int64(v) }