Skip to content

Commit

Permalink
update decoder api
Browse files Browse the repository at this point in the history
  • Loading branch information
a8m committed Sep 26, 2016
1 parent 81f98b3 commit f489e3f
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 44 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,17 @@ The current version is `1.0.0-alpha.1`, and I'm waiting to hear from you
if there are any issues or bug reports, to make it stable.
(comment: there is a test file named `decode_test` that contains a [test case](https://github.com/a8m/djson/blob/master/decode_test.go#L104) that
compares the results to `encoding/json` - feel free to add more values if you find they are important)
I'm also plaining to add the `DecodeStream(io.ReaderCloser)` method, to support stream decoding
I'm also plaining to add the `DecodeStream(io.ReaderCloser)` method(or `NewDecoder(io.ReaderCloser)`), to support stream decoding
without breaking performance.



### Benchmark
There are 3 benchmark types. small, medium and large payloads.
All the 3 taked from the `jsonparser` project, and they trying to simulate a real-life usage.
Each test results sats on a metrics table below. lower is better.
__Time/op__ is in nanoseconds, __B/op__ is how many bytes were allocated
per op and __allocs/op__ is the total number of memory allocations.
Bench result that is better than the standard `encoding/json` marked in bold text.
Benchmarks run on AWS EC2 instance(c4.xlarge). see: [screenshots](link
to screenshot in assets)

174 changes: 135 additions & 39 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,126 @@ import (
"unicode"
)

// decoder is the object that holds the state of the scaning
type decoder struct {
data []byte
sdata string
pos int
end int
// Decoder is the object that holds the state of the decoding
type Decoder struct {
pos int
end int
data []byte
sdata string
usestring bool
}

func newDecoder(data []byte) *decoder {
return &decoder{
// NewDecoder creates new Decoder from the JSON-encoded data
func NewDecoder(data []byte) *Decoder {
return &Decoder{
data: data,
// Add a string version of the data. it is good because we do one allocation
// operation for string conversion(from bytes to string) and then
// use "slicing" to create strings in the "decoder.string" method.
// However, string is a read-only slice, and since the slice references the
// original array, as long as the slice is kept around the garbage collector
// can't release the array.
//
// Here is the improvements:
// small payload - 0.13~ time faster, does 0.45~ less memory allocations but
// the total number of bytes that allocated is 0.03~ bigger
// medium payload - 0.16~ time faster, does 0.5~ less memory allocations but
// the total number of bytes that allocated is 0.05~ bigger
// large payload - 0.13~ time faster, does 0.50~ less memory allocations but
// the total number of bytes that allocated is 0.02~ bigger
//
// I don't know if it's worth it, let's wait for the community feedbacks and
// then I'll see where I go from there.
sdata: string(data),
end: len(data),
end: len(data),
}
}

// AllocString pre-allocate a string version of the data before starting
// decoding.
// It is used to make the decode operation more fast(see below) by doing one
// allocation operation for string conversion(from bytes), and then use
// "slicing" to create non-escaped strings in the "Decoder.string" method.
// However, string is a read-only slice, and since the slice references the
// original array, as long as the slice is kept around, the garbage collector
// can't release the array.
// For this reason, you want to use this method only when the Decoder's result
// is a "read-only" or you are adding more elements to it. see example below.
//
// Here are the improvements:
//
// small payload - 0.13~ time faster, does 0.45~ less memory allocations but
// the total number of bytes that are allocated is 0.03~ bigger
//
// medium payload - 0.16~ time faster, does 0.5~ less memory allocations but
// the total number of bytes that are allocated is 0.05~ bigger
//
// large payload - 0.13~ time faster, does 0.50~ less memory allocations but
// the total number of bytes that are allocated is 0.02~ bigger
//
// Here is an example to illustrate when you don't want to use this method
//
// str := fmt.Sprintf(`{"foo": "bar", "baz": "%s"}`, strings.Repeat("#", 1024 * 1024))
// dec := djson.NewDecoder([]byte(str))
// dec.AllocString()
// ev, err := dec.DecodeObject()
//
// // inpect memory stats here; MemStats.Alloc ~= 1M
//
// delete(ev, "baz") // or ev["baz"] = "qux"
//
// // inpect memory stats again; MemStats.Alloc ~= 1M
// // it means that the chunk that sat in the "baz" value is not freed
//
func (d *Decoder) AllocString() {
d.sdata = string(d.data)
d.usestring = true
}

// Decode parses the JSON-encoded data and returns an interface value.
// The interface value could be one of these:
//
// bool, for JSON booleans
// float64, for JSON numbers
// string, for JSON strings
// []interface{}, for JSON arrays
// map[string]interface{}, for JSON objects
// nil for JSON null
//
// Note that the Decode is compatible with the the following
// insructions:
//
// var v interface{}
// err := json.Unmarshal(data, &v)
//
func (d *Decoder) Decode() (interface{}, error) {
val, err := d.any()
if err != nil {
return nil, err
}
if c := d.skipSpaces(); d.pos < d.end {
return nil, d.error(c, "after top-level value")
}
return val, nil
}

// DecodeObject is the same as Decode but it returns map[string]interface{}.
// You should use it to parse JSON objects.
func (d *Decoder) DecodeObject() (map[string]interface{}, error) {
if c := d.skipSpaces(); c != '{' {
return nil, d.error(c, "looking for beginning of object")
}
val, err := d.object()
if err != nil {
return nil, err
}
if c := d.skipSpaces(); d.pos < d.end {
return nil, d.error(c, "after top-level value")
}
return val, nil
}

// DecodeArray is the same as Decode but it returns []interface{}.
// You should use it to parse JSON arrays.
func (d *Decoder) DecodeArray() ([]interface{}, error) {
if c := d.skipSpaces(); c != '[' {
return nil, d.error(c, "looking for beginning of array")
}
val, err := d.array()
if err != nil {
return nil, err
}
if c := d.skipSpaces(); d.pos < d.end {
return nil, d.error(c, "after top-level value")
}
return val, nil
}

// any used to decode any valid JSON value, and returns an
// interface{} that holds the actual data
func (d *decoder) any() (interface{}, error) {
func (d *Decoder) any() (interface{}, error) {
switch c := d.skipSpaces(); c {
case '"':
return d.string()
Expand Down Expand Up @@ -88,7 +172,7 @@ func (d *decoder) any() (interface{}, error) {
}

// string called by `any` or `object`(for map keys) after reading `"`
func (d *decoder) string() (string, error) {
func (d *Decoder) string() (string, error) {
d.pos++

var (
Expand Down Expand Up @@ -117,7 +201,12 @@ scan:
}
s = string(data)
} else {
s = d.sdata[start:d.pos]
if d.usestring {
s = d.sdata[start:d.pos]
} else {

s = string(d.data[start:d.pos])
}
}
d.pos++
return s, nil
Expand Down Expand Up @@ -159,7 +248,7 @@ escape_u:
}

// number called by `any` after reading `-` or number between 0 to 9
func (d *decoder) number(neg bool) (float64, error) {
func (d *Decoder) number(neg bool) (float64, error) {
var (
n float64
c byte
Expand Down Expand Up @@ -214,11 +303,18 @@ func (d *decoder) number(neg bool) (float64, error) {
}

if isFloat {
v, err := strconv.ParseFloat(d.sdata[start:d.pos], 64)
if err != nil {
var (
err error
sn string
)
if d.usestring {
sn = d.sdata[start:d.pos]
} else {
sn = string(d.data[start:d.pos])
}
if n, err = strconv.ParseFloat(sn, 64); err != nil {
return 0, err
}
n = v
}
if neg {
return -n, nil
Expand All @@ -227,7 +323,7 @@ func (d *decoder) number(neg bool) (float64, error) {
}

// array accept valid JSON array value
func (d *decoder) array() ([]interface{}, error) {
func (d *Decoder) array() ([]interface{}, error) {
// the '[' token already scanned
d.pos++

Expand Down Expand Up @@ -266,7 +362,7 @@ out:
}

// object accept valid JSON array value
func (d *decoder) object() (map[string]interface{}, error) {
func (d *Decoder) object() (map[string]interface{}, error) {
// the '{' token already scanned
d.pos++

Expand Down Expand Up @@ -325,7 +421,7 @@ func (d *decoder) object() (map[string]interface{}, error) {
}

// next return the next byte in the input
func (d *decoder) next() byte {
func (d *Decoder) next() byte {
d.pos++
if d.pos < d.end {
return d.data[d.pos]
Expand All @@ -334,7 +430,7 @@ func (d *decoder) next() byte {
}

// returns the next char after white spaces
func (d *decoder) skipSpaces() byte {
func (d *Decoder) skipSpaces() byte {
loop:
if d.pos == d.end {
return 0
Expand All @@ -349,7 +445,7 @@ loop:
}

// emit sytax errors
func (d *decoder) error(c byte, context string) error {
func (d *Decoder) error(c byte, context string) error {
if d.pos < d.end {
return &SyntaxError{"invalid character " + quoteChar(c) + " " + context, d.pos + 1}
}
Expand Down
10 changes: 7 additions & 3 deletions interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func Type(v interface{}) ValueType {
// err := json.Unmarshal(data, &v)
//
func Decode(data []byte) (interface{}, error) {
d := newDecoder(data)
d := NewDecoder(data)
val, err := d.any()
if err != nil {
return nil, err
Expand All @@ -93,7 +93,7 @@ func Decode(data []byte) (interface{}, error) {
// DecodeObject is the same as Decode but it returns map[string]interface{}.
// You should use it to parse JSON objects.
func DecodeObject(data []byte) (map[string]interface{}, error) {
d := newDecoder(data)
d := NewDecoder(data)
if c := d.skipSpaces(); c != '{' {
return nil, d.error(c, "looking for beginning of object")
}
Expand All @@ -110,7 +110,7 @@ func DecodeObject(data []byte) (map[string]interface{}, error) {
// DecodeArray is the same as Decode but it returns []interface{}.
// You should use it to parse JSON arrays.
func DecodeArray(data []byte) ([]interface{}, error) {
d := newDecoder(data)
d := NewDecoder(data)
if c := d.skipSpaces(); c != '[' {
return nil, d.error(c, "looking for beginning of array")
}
Expand All @@ -123,3 +123,7 @@ func DecodeArray(data []byte) ([]interface{}, error) {
}
return val, nil
}

// TODO(a8m): the 3 methods above could be written like this:
//
// return NewDecoder(data).DecodeXXX()

0 comments on commit f489e3f

Please sign in to comment.