diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index ab815c8..e59969b 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -6,7 +6,6 @@ import ( "strconv" "strings" "sync" - "unsafe" "github.com/dop251/goja" "github.com/jf-tech/go-corelib/caches" @@ -113,10 +112,7 @@ func getNodeJSON(n *idr.Node) string { if disableCaching { return idr.JSONify2(n) } - // TODO if in the future we have *idr.Node allocation recycling, then this by-addr caching - // won't work. Ideally, we should have a node ID which refreshes upon recycling. - addr := strconv.FormatUint(uint64(uintptr(unsafe.Pointer(n))), 16) - j, _ := NodeToJSONCache.Get(addr, func(interface{}) (interface{}, error) { + j, _ := NodeToJSONCache.Get(n.ID, func(interface{}) (interface{}, error) { return idr.JSONify2(n), nil }) return j.(string) diff --git a/customfuncs/javascript_test.go b/customfuncs/javascript_test.go index a84b716..64353d1 100644 --- a/customfuncs/javascript_test.go +++ b/customfuncs/javascript_test.go @@ -223,12 +223,12 @@ func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { } // go test -bench=. -benchmem -benchtime=30s -// BenchmarkIfElse-4 368017143 98.1 ns/op 8 B/op 1 allocs/op -// BenchmarkEval-4 26409430 1386 ns/op 418 B/op 8 allocs/op -// BenchmarkJavascriptWithNoCache-4 172803 210958 ns/op 136608 B/op 1698 allocs/op -// BenchmarkJavascriptWithCache-4 23059004 1572 ns/op 129 B/op 8 allocs/op -// BenchmarkConcurrentJavascriptWithNoCache-4 1140 32729941 ns/op 27328924 B/op 339654 allocs/op -// BenchmarkConcurrentJavascriptWithCache-4 70977 504870 ns/op 26568 B/op 1745 allocs/op +// BenchmarkIfElse-4 352522934 97.9 ns/op 8 B/op 1 allocs/op +// BenchmarkEval-4 25887961 1405 ns/op 418 B/op 8 allocs/op +// BenchmarkJavascriptWithNoCache-4 172342 210129 ns/op 136607 B/op 1698 allocs/op +// BenchmarkJavascriptWithCache-4 15996846 2062 ns/op 128 B/op 8 allocs/op +// BenchmarkConcurrentJavascriptWithNoCache-4 1143 33140598 ns/op 27328719 B/op 339653 allocs/op +// BenchmarkConcurrentJavascriptWithCache-4 65140 543091 ns/op 26608 B/op 1746 allocs/op var ( benchTitles = []string{"", "Dr", "Sir"} diff --git a/handlers/omni/v2/fileformat/csv/reader.go b/handlers/omni/v2/fileformat/csv/reader.go index dbb0a40..6c81181 100644 --- a/handlers/omni/v2/fileformat/csv/reader.go +++ b/handlers/omni/v2/fileformat/csv/reader.go @@ -125,6 +125,12 @@ func (r *reader) recordToNode(record []string) *idr.Node { return root } +func (r *reader) Release(n *idr.Node) { + if n != nil { + idr.RemoveAndReleaseTree(n) + } +} + func (r *reader) IsContinuableError(err error) bool { return !IsErrInvalidHeader(err) && err != io.EOF } diff --git a/handlers/omni/v2/fileformat/csv/reader_test.go b/handlers/omni/v2/fileformat/csv/reader_test.go index 3171990..09a41c0 100644 --- a/handlers/omni/v2/fileformat/csv/reader_test.go +++ b/handlers/omni/v2/fileformat/csv/reader_test.go @@ -165,6 +165,7 @@ func TestReader(t *testing.T) { expectedJSON, ok := test.expected[0].(string) assert.True(t, ok) assert.Equal(t, jsons.BPJ(expectedJSON), jsons.BPJ(idr.JSONify2(n))) + r.Release(n) test.expected = test.expected[1:] } }) diff --git a/handlers/omni/v2/fileformat/fileformat.go b/handlers/omni/v2/fileformat/fileformat.go index d63c88f..41aa673 100644 --- a/handlers/omni/v2/fileformat/fileformat.go +++ b/handlers/omni/v2/fileformat/fileformat.go @@ -28,6 +28,9 @@ type FormatReader interface { // Read returns a *Node and its subtree that will eventually be parsed and transformed into an // output record. If EOF has been reached, io.EOF must be returned. Read() (*idr.Node, error) + // Release gives the reader a chance to free resources of the *Node and its subtree that it returned + // to caller in a previous Read() call. + Release(*idr.Node) // IsContinuableError determines whether an FormatReader returned error is continuable or not. // For certain errors (like EOF or corruption) there is no point to keep on trying; while others // can be safely ignored. diff --git a/handlers/omni/v2/fileformat/json/reader.go b/handlers/omni/v2/fileformat/json/reader.go index 62d1119..c88b53f 100644 --- a/handlers/omni/v2/fileformat/json/reader.go +++ b/handlers/omni/v2/fileformat/json/reader.go @@ -40,6 +40,12 @@ func (r *reader) Read() (*idr.Node, error) { return n, nil } +func (r *reader) Release(n *idr.Node) { + if n != nil { + r.r.Release(n) + } +} + func (r *reader) IsContinuableError(err error) bool { return !IsErrNodeReadingFailed(err) && err != io.EOF } diff --git a/handlers/omni/v2/fileformat/json/reader_test.go b/handlers/omni/v2/fileformat/json/reader_test.go index b8389ae..1fe1c80 100644 --- a/handlers/omni/v2/fileformat/json/reader_test.go +++ b/handlers/omni/v2/fileformat/json/reader_test.go @@ -47,12 +47,15 @@ func TestReader_Read_Success(t *testing.T) { name, err := idr.MatchSingle(n, "name") assert.NoError(t, err) assert.Equal(t, "john", name.InnerText()) + // intentionally not calling r.Release(n) to verify that the + // stream node is freed up by a subsequent Read() call. n, err = r.Read() assert.NoError(t, err) name, err = idr.MatchSingle(n, "name") assert.NoError(t, err) assert.Equal(t, "jane", name.InnerText()) + r.Release(n) n, err = r.Read() assert.Error(t, err) diff --git a/handlers/omni/v2/fileformat/xml/reader.go b/handlers/omni/v2/fileformat/xml/reader.go index 47722fd..8991190 100644 --- a/handlers/omni/v2/fileformat/xml/reader.go +++ b/handlers/omni/v2/fileformat/xml/reader.go @@ -40,6 +40,12 @@ func (r *reader) Read() (*idr.Node, error) { return n, nil } +func (r *reader) Release(n *idr.Node) { + if n != nil { + r.r.Release(n) + } +} + func (r *reader) IsContinuableError(err error) bool { return !IsErrNodeReadingFailed(err) && err != io.EOF } diff --git a/handlers/omni/v2/fileformat/xml/reader_test.go b/handlers/omni/v2/fileformat/xml/reader_test.go index 07a17b5..0a4d887 100644 --- a/handlers/omni/v2/fileformat/xml/reader_test.go +++ b/handlers/omni/v2/fileformat/xml/reader_test.go @@ -24,6 +24,7 @@ func TestReader_Read_Success(t *testing.T) { 1 2 + 3 `), "Root/Node[. != '2']") assert.NoError(t, err) @@ -34,6 +35,14 @@ func TestReader_Read_Success(t *testing.T) { assert.Equal(t, "1", n.InnerText()) // xml.Decoder seems to keeps line at the end of whatever inside an element closing tag. assert.Equal(t, 3, r.r.AtLine()) + // intentionally not calling r.Release(n) to verify that the + // stream node is freed up by a subsequent Read() call. + + n, err = r.Read() + assert.NoError(t, err) + assert.Equal(t, "3", n.InnerText()) + assert.Equal(t, 5, r.r.AtLine()) + r.Release(n) n, err = r.Read() assert.Error(t, err) diff --git a/handlers/omni/v2/handler_test.go b/handlers/omni/v2/handler_test.go index a2143fb..dc7e6ec 100644 --- a/handlers/omni/v2/handler_test.go +++ b/handlers/omni/v2/handler_test.go @@ -52,6 +52,7 @@ type testFormatReader struct { } func (r testFormatReader) Read() (*idr.Node, error) { panic("implement me") } +func (r testFormatReader) Release(*idr.Node) { panic("implement me") } func (r testFormatReader) IsContinuableError(error) bool { panic("implement me") } func (r testFormatReader) FmtErr(string, ...interface{}) error { panic("implement me") } diff --git a/handlers/omni/v2/ingester.go b/handlers/omni/v2/ingester.go index 7a3ae7a..603310f 100644 --- a/handlers/omni/v2/ingester.go +++ b/handlers/omni/v2/ingester.go @@ -20,13 +20,14 @@ type ingester struct { } func (g *ingester) Read() ([]byte, error) { - node, err := g.reader.Read() + n, err := g.reader.Read() if err != nil { // Read() supposed to have already done CtxAwareErr error wrapping. So directly return. return nil, err } + defer g.reader.Release(n) result, err := transform.NewParseCtx( - g.ctx, g.customFuncs, g.customParseFuncs).ParseNode(node, g.finalOutputDecl) + g.ctx, g.customFuncs, g.customParseFuncs).ParseNode(n, g.finalOutputDecl) if err != nil { // ParseNode() error not CtxAwareErr wrapped, so wrap it. // Note errs.ErrorTransformFailed is a continuable error. diff --git a/handlers/omni/v2/ingester_test.go b/handlers/omni/v2/ingester_test.go index e8d8021..0740c61 100644 --- a/handlers/omni/v2/ingester_test.go +++ b/handlers/omni/v2/ingester_test.go @@ -14,10 +14,12 @@ import ( ) var errContinuableInTest = errors.New("continuable error") +var ingesterTestNode = idr.CreateNode(idr.ElementNode, "test") type testReader struct { - result []*idr.Node - err []error + result []*idr.Node + err []error + releaseCalled int } func (r *testReader) Read() (*idr.Node, error) { @@ -31,6 +33,8 @@ func (r *testReader) Read() (*idr.Node, error) { return result, err } +func (r *testReader) Release(n *idr.Node) { r.releaseCalled++ } + func (r *testReader) IsContinuableError(err error) bool { return err == errContinuableInTest } func (r *testReader) FmtErr(format string, args ...interface{}) error { @@ -45,6 +49,7 @@ func TestIngester_Read_ReadFailure(t *testing.T) { assert.Error(t, err) assert.Equal(t, "test failure", err.Error()) assert.Nil(t, b) + assert.Equal(t, 0, g.reader.(*testReader).releaseCalled) } func TestIngester_Read_ParseNodeFailure(t *testing.T) { @@ -57,7 +62,7 @@ func TestIngester_Read_ParseNodeFailure(t *testing.T) { assert.NoError(t, err) g := &ingester{ finalOutputDecl: finalOutputDecl, - reader: &testReader{result: []*idr.Node{nil}, err: []error{nil}}, + reader: &testReader{result: []*idr.Node{ingesterTestNode}, err: []error{nil}}, } b, err := g.Read() assert.Error(t, err) @@ -67,6 +72,7 @@ func TestIngester_Read_ParseNodeFailure(t *testing.T) { `ctx: fail to transform. err: fail to convert value 'abc' to type 'int' on 'FINAL_OUTPUT', err: strconv.ParseFloat: parsing "abc": invalid syntax`, err.Error()) assert.Nil(t, b) + assert.Equal(t, 1, g.reader.(*testReader).releaseCalled) } func TestIngester_Read_Success(t *testing.T) { @@ -79,11 +85,12 @@ func TestIngester_Read_Success(t *testing.T) { assert.NoError(t, err) g := &ingester{ finalOutputDecl: finalOutputDecl, - reader: &testReader{result: []*idr.Node{nil}, err: []error{nil}}, + reader: &testReader{result: []*idr.Node{ingesterTestNode}, err: []error{nil}}, } b, err := g.Read() assert.NoError(t, err) assert.Equal(t, "123", string(b)) + assert.Equal(t, 1, g.reader.(*testReader).releaseCalled) } func TestIsContinuableError(t *testing.T) { diff --git a/handlers/omni/v2/transform/parse.go b/handlers/omni/v2/transform/parse.go index 5283184..bdd29c2 100644 --- a/handlers/omni/v2/transform/parse.go +++ b/handlers/omni/v2/transform/parse.go @@ -6,7 +6,6 @@ import ( "reflect" "strconv" "strings" - "unsafe" "github.com/jf-tech/go-corelib/strs" @@ -37,11 +36,6 @@ func NewParseCtx( } } -func nodePtrAddrStr(n *idr.Node) string { - // `uintptr` is faster than `fmt.Sprintf("%p"...)` - return strconv.FormatUint(uint64(uintptr(unsafe.Pointer(n))), 16) -} - func resultTypeConversion(decl *Decl, value string) (interface{}, error) { if decl.resultType() == ResultTypeString { return value, nil @@ -131,9 +125,7 @@ func normalizeAndReturnValue(decl *Decl, value interface{}) (interface{}, error) func (p *parseCtx) ParseNode(n *idr.Node, decl *Decl) (interface{}, error) { var cacheKey string if !p.disableTransformCache { - // TODO if in the future we have *idr.Node allocation recycling, then this by-addr caching won't work. - // Ideally, we should have a node ID which refreshes upon recycling. - cacheKey = nodePtrAddrStr(n) + "/" + decl.hash + cacheKey = strconv.FormatInt(n.ID, 16) + "/" + decl.hash if cacheValue, found := p.transformCache[cacheKey]; found { return cacheValue, nil } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last b/idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last similarity index 100% rename from idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last rename to idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first b/idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first similarity index 100% rename from idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first rename to idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last b/idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last similarity index 100% rename from idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last rename to idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child b/idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_only_child similarity index 100% rename from idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child rename to idr/.snapshots/TestRemoveAndReleaseTree-remove_a_node_who_is_its_parents_only_child diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing b/idr/.snapshots/TestRemoveAndReleaseTree-remove_a_root_does_nothing_(when_node_caching_is_off) similarity index 100% rename from idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing rename to idr/.snapshots/TestRemoveAndReleaseTree-remove_a_root_does_nothing_(when_node_caching_is_off) diff --git a/idr/jsonreader.go b/idr/jsonreader.go index 08f2ea1..34fe8b2 100644 --- a/idr/jsonreader.go +++ b/idr/jsonreader.go @@ -48,7 +48,7 @@ func (sp *JSONStreamReader) wrapUpCurAndTargetCheck() *Node { // node fully and discovered sp.xpathFilterExpr can't be satisfied, so this // sp.stream isn't a target. To prevent future mismatch for other stream candidate, // we need to remove it from Node tree completely. And reset sp.stream. - RemoveFromTree(sp.stream) + RemoveAndReleaseTree(sp.stream) sp.stream = nil return nil } @@ -192,14 +192,25 @@ func (sp *JSONStreamReader) parse() (*Node, error) { // Read returns a *Node that matches the xpath streaming criteria. func (sp *JSONStreamReader) Read() (*Node, error) { // Because this is a streaming read, we need to release/remove last - // stream node from the node tree to free up memory. + // stream node from the node tree to free up memory. If Release() is + // called after Read() call, then sp.stream is already cleaned up; + // adding this piece of code here just in case Release() isn't called. if sp.stream != nil { - RemoveFromTree(sp.stream) + RemoveAndReleaseTree(sp.stream) sp.stream = nil } return sp.parse() } +// Release releases the *Node (and its subtree) that Read() has previously +// returned. +func (sp *JSONStreamReader) Release(n *Node) { + if n == sp.stream { + sp.stream = nil + } + RemoveAndReleaseTree(n) +} + // AtLine returns the **rough** line number of the current JSON decoder. func (sp *JSONStreamReader) AtLine() int { return sp.r.AtLine() diff --git a/idr/jsonreader_test.go b/idr/jsonreader_test.go index fddc797..8ae2613 100644 --- a/idr/jsonreader_test.go +++ b/idr/jsonreader_test.go @@ -119,6 +119,7 @@ func TestJSONStreamReader(t *testing.T) { assert.NoError(t, err) assert.True(t, len(test.expected) > 0) assert.Equal(t, test.expected[0], JSONify2(n)) + sp.Release(n) test.expected = test.expected[1:] } assert.Equal(t, 2, sp.AtLine()) diff --git a/idr/navigator_test.go b/idr/navigator_test.go index 0aca245..e5c03f7 100644 --- a/idr/navigator_test.go +++ b/idr/navigator_test.go @@ -9,6 +9,7 @@ import ( ) func navTestSetup(t *testing.T) (*testTree, *navigator, func(*Node)) { + setupTestNodeCaching(testNodeCachingOff) tt := newTestTree(t, testTreeXML) nav := createNavigator(tt.root) moveTo := func(n *Node) { nav.cur = n } @@ -120,8 +121,8 @@ func TestMoveToChild(t *testing.T) { moveTo(tt.attrC1) assert.False(t, nav.MoveToChild()) // remove elemC3/elemC4, so elemC now only has attrC1 and attrC2 - RemoveFromTree(tt.elemC3) - RemoveFromTree(tt.elemC4) + RemoveAndReleaseTree(tt.elemC3) + RemoveAndReleaseTree(tt.elemC4) moveTo(tt.elemC) assert.False(t, nav.MoveToChild()) moveTo(tt.elemB) diff --git a/idr/node.go b/idr/node.go index 1d66b20..f0757b8 100644 --- a/idr/node.go +++ b/idr/node.go @@ -3,6 +3,8 @@ package idr import ( "fmt" "strings" + "sync" + "sync/atomic" ) // NodeType is the type of a Node in an IDR. @@ -44,6 +46,12 @@ func (nt NodeType) String() string { // for each format. // - Node allocation recycling. type Node struct { + // ID uniquely identifies a Node, whether it's newly created or recycled and reused from + // the node allocation cache. Previously we sometimes used a *Node's pointer address as a + // unique ID which isn't sufficiently unique anymore given the introduction of using + // sync.Pool for node allocation caching. + ID int64 + Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node Type NodeType @@ -52,12 +60,55 @@ type Node struct { FormatSpecific interface{} } +// Give test a chance to turn node caching on/off. Not exported; always caching in production code. +var nodeCaching = true +var nodePool sync.Pool + +func allocNode() *Node { + n := &Node{} + n.reset() + return n +} + +func resetNodePool() { + nodePool = sync.Pool{ + New: func() interface{} { + return allocNode() + }, + } +} + +func init() { + resetNodePool() +} + // CreateNode creates a generic *Node. func CreateNode(ntype NodeType, data string) *Node { - return &Node{ - Type: ntype, - Data: data, + if nodeCaching { + // Node out of pool has already been reset. + n := nodePool.Get().(*Node) + n.Type = ntype + n.Data = data + return n } + n := allocNode() + n.Type = ntype + n.Data = data + return n +} + +var nodeID = int64(0) + +func newNodeID() int64 { + return atomic.AddInt64(&nodeID, 1) +} + +func (n *Node) reset() { + n.ID = newNodeID() + n.Parent, n.FirstChild, n.LastChild, n.PrevSibling, n.NextSibling = nil, nil, nil, nil, nil + n.Type = 0 + n.Data = "" + n.FormatSpecific = nil } // InnerText returns a Node's children's texts concatenated. @@ -95,11 +146,11 @@ func AddChild(parent, n *Node) { parent.LastChild = n } -// RemoveFromTree removes a node and its subtree from an IDR -// tree it is in. If the node is the root of the tree, it's a no-op. -func RemoveFromTree(n *Node) { +// RemoveAndReleaseTree removes a node and its subtree from an IDR tree it is in and +// release the resources (Node allocation) associated with the node and its subtree. +func RemoveAndReleaseTree(n *Node) { if n.Parent == nil { - return + goto recycle } if n.Parent.FirstChild == n { if n.Parent.LastChild == n { @@ -118,7 +169,21 @@ func RemoveFromTree(n *Node) { n.NextSibling.PrevSibling = n.PrevSibling } } - n.Parent = nil - n.PrevSibling = nil - n.NextSibling = nil +recycle: + recycle(n) +} + +func recycle(n *Node) { + if !nodeCaching { + return + } + for c := n.FirstChild; c != nil; { + // Have to save c.NextSibling before recycle(c) call or + // c.NextSibling would be wiped out during the call. + next := c.NextSibling + recycle(c) + c = next + } + n.reset() + nodePool.Put(n) } diff --git a/idr/node_test.go b/idr/node_test.go index 81d7fb4..2db73ec 100644 --- a/idr/node_test.go +++ b/idr/node_test.go @@ -21,37 +21,37 @@ func rootOf(n *Node) *Node { return n } -func checkPointersInTree(t *testing.T, n *Node) { +func checkPointersInTree(tb testing.TB, n *Node) { if n == nil { return } if n.FirstChild != nil { - assert.True(t, n == n.FirstChild.Parent) + assert.True(tb, n == n.FirstChild.Parent) } if n.LastChild != nil { - assert.True(t, n == n.LastChild.Parent) + assert.True(tb, n == n.LastChild.Parent) } - checkPointersInTree(t, n.FirstChild) - // There is no need to call checkPointersInTree(t, n.LastChild) - // because checkPointersInTree(t, n.FirstChild) will traverse all its + checkPointersInTree(tb, n.FirstChild) + // There is no need to call checkPointersInTree(tb, n.LastChild) + // because checkPointersInTree(tb, n.FirstChild) will traverse all its // siblings to the end, and if the last one isn't n.LastChild then it will fail. parent := n.Parent // could be nil if n is the root of a tree. // Verify the PrevSibling chain cur, prev := n, n.PrevSibling for ; prev != nil; cur, prev = prev, prev.PrevSibling { - assert.True(t, prev.Parent == parent) - assert.True(t, prev.NextSibling == cur) + assert.True(tb, prev.Parent == parent) + assert.True(tb, prev.NextSibling == cur) } - assert.True(t, cur.PrevSibling == nil) - assert.True(t, parent == nil || parent.FirstChild == cur) + assert.True(tb, cur.PrevSibling == nil) + assert.True(tb, parent == nil || parent.FirstChild == cur) // Verify the NextSibling chain cur, next := n, n.NextSibling for ; next != nil; cur, next = next, next.NextSibling { - assert.True(t, next.Parent == parent) - assert.True(t, next.PrevSibling == cur) + assert.True(tb, next.Parent == parent) + assert.True(tb, next.PrevSibling == cur) } - assert.True(t, cur.NextSibling == nil) - assert.True(t, parent == nil || parent.LastChild == cur) + assert.True(tb, cur.NextSibling == nil) + assert.True(tb, parent == nil || parent.LastChild == cur) } type testTree struct { @@ -81,7 +81,7 @@ const ( testTreeNotXML = false ) -func newTestTree(t *testing.T, xmlNode bool) *testTree { +func newTestTree(tb testing.TB, xmlNode bool) *testTree { mkNode := func(parent *Node, ntype NodeType, name string) *Node { var node *Node if xmlNode { @@ -115,7 +115,7 @@ func newTestTree(t *testing.T, xmlNode bool) *testTree { elemC3, textC3 := mkPair(elemC, ElementNode, "elemC3", "textC3") elemC4, textC4 := mkPair(elemC, ElementNode, "elemC4", "textC4") - checkPointersInTree(t, root) + checkPointersInTree(tb, root) return &testTree{ root: root, @@ -135,50 +135,87 @@ func newTestTree(t *testing.T, xmlNode bool) *testTree { } } +const ( + testNodeCachingOn = true + testNodeCachingOff = false +) + +func setupTestNodeCaching(caching bool) { + resetNodePool() + nodeCaching = caching +} + func TestDumpTestTree(t *testing.T) { + setupTestNodeCaching(testNodeCachingOff) cupaloy.SnapshotT(t, JSONify1(newTestTree(t, testTreeXML).root)) } func TestInnerText(t *testing.T) { + setupTestNodeCaching(testNodeCachingOff) tt := newTestTree(t, testTreeNotXML) assert.Equal(t, tt.textA1.Data+tt.textA2.Data, tt.elemA.InnerText()) // Note attribute's texts are skipped in InnerText(), by design. assert.Equal(t, tt.textC3.Data+tt.textC4.Data, tt.elemC.InnerText()) } -func TestRemoveNodeAndSubTree(t *testing.T) { +func TestRemoveAndReleaseTree(t *testing.T) { t.Run("remove a node who is its parents only child", func(t *testing.T) { + setupTestNodeCaching(testNodeCachingOn) tt := newTestTree(t, testTreeXML) - RemoveFromTree(tt.elemB1) + RemoveAndReleaseTree(tt.elemB1) checkPointersInTree(t, tt.root) cupaloy.SnapshotT(t, JSONify1(tt.root)) }) t.Run("remove a node who is its parents first child but not the last", func(t *testing.T) { + setupTestNodeCaching(testNodeCachingOn) tt := newTestTree(t, testTreeXML) - RemoveFromTree(tt.elemA) + RemoveAndReleaseTree(tt.elemA) checkPointersInTree(t, tt.root) cupaloy.SnapshotT(t, JSONify1(tt.root)) }) t.Run("remove a node who is its parents middle child not the first not the last", func(t *testing.T) { + setupTestNodeCaching(testNodeCachingOn) tt := newTestTree(t, testTreeXML) - RemoveFromTree(tt.elemB) + RemoveAndReleaseTree(tt.elemB) checkPointersInTree(t, tt.root) cupaloy.SnapshotT(t, JSONify1(tt.root)) }) t.Run("remove a node who is its parents last child but not the first", func(t *testing.T) { + setupTestNodeCaching(testNodeCachingOn) tt := newTestTree(t, testTreeXML) - RemoveFromTree(tt.elemC) + RemoveAndReleaseTree(tt.elemC) checkPointersInTree(t, tt.root) cupaloy.SnapshotT(t, JSONify1(tt.root)) }) - t.Run("remove a root does nothing", func(t *testing.T) { + t.Run("remove a root does nothing (when node caching is off)", func(t *testing.T) { + setupTestNodeCaching(testNodeCachingOff) tt := newTestTree(t, testTreeXML) - RemoveFromTree(tt.root) + RemoveAndReleaseTree(tt.root) checkPointersInTree(t, tt.root) cupaloy.SnapshotT(t, JSONify1(tt.root)) }) } + +// go test -bench=. -benchmem -benchtime=30s +// BenchmarkCreateAndDestroyTree_NoCache-4 20421031 1736 ns/op 1872 B/op 19 allocs/op +// BenchmarkCreateAndDestroyTree_WithCache-4 22744428 1559 ns/op 144 B/op 1 allocs/op + +func BenchmarkCreateAndDestroyTree_NoCache(b *testing.B) { + setupTestNodeCaching(testNodeCachingOff) + for i := 0; i < b.N; i++ { + tt := newTestTree(b, false) + RemoveAndReleaseTree(tt.root) + } +} + +func BenchmarkCreateAndDestroyTree_WithCache(b *testing.B) { + setupTestNodeCaching(testNodeCachingOn) + for i := 0; i < b.N; i++ { + tt := newTestTree(b, false) + RemoveAndReleaseTree(tt.root) + } +} diff --git a/idr/xmlreader.go b/idr/xmlreader.go index a6ac69d..88843ed 100644 --- a/idr/xmlreader.go +++ b/idr/xmlreader.go @@ -49,7 +49,7 @@ func (sp *XMLStreamReader) wrapUpCurAndTargetCheck() *Node { // discovered sp.xpathFilterExpr can't be satisfied, so this sp.stream isn't a // stream target. To prevent future mismatch for other stream candidate, we need to // remove it from Node tree completely. And reset sp.stream. - RemoveFromTree(sp.stream) + RemoveAndReleaseTree(sp.stream) sp.stream = nil return nil } @@ -167,15 +167,26 @@ func (sp *XMLStreamReader) Read() (n *Node, err error) { return nil, sp.err } // Because this is a streaming read, we need to remove the last - // stream node from the node tree to free up memory. + // stream node from the node tree to free up memory. If Release() + // is called after Read() call, then sp.stream is already cleaned up; + // adding this piece of code here just in case Release() isn't called. if sp.stream != nil { - RemoveFromTree(sp.stream) + RemoveAndReleaseTree(sp.stream) sp.stream = nil } n, sp.err = sp.parse() return n, sp.err } +// Release releases the *Node (and its subtree) that Read() has previously +// returned. +func (sp *XMLStreamReader) Release(n *Node) { + if n == sp.stream { + sp.stream = nil + } + RemoveAndReleaseTree(n) +} + // AtLine returns the **rough** line number of the current XML decoder. func (sp *XMLStreamReader) AtLine() int { // Given all the libraries are of fixed versions in go modules, we're fine. diff --git a/idr/xmlreader_test.go b/idr/xmlreader_test.go index 503feff..d420470 100644 --- a/idr/xmlreader_test.go +++ b/idr/xmlreader_test.go @@ -43,6 +43,8 @@ func TestXMLStreamReader_SuccessWithXPathWithFilter(t *testing.T) { t.Run("IDR snapshot after 1st Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + // intentionally not calling sp.Release() to see if subsequent sp.Read() + // call would do the right thing and free the stream node or not. assert.Equal(t, 5, sp.AtLine()) // Second `` read @@ -52,6 +54,7 @@ func TestXMLStreamReader_SuccessWithXPathWithFilter(t *testing.T) { t.Run("IDR snapshot after 2nd Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + sp.Release(n) assert.Equal(t, 7, sp.AtLine()) // Third `` read (Note we will skip 'b3' since the streamElementFilter excludes it) @@ -61,6 +64,7 @@ func TestXMLStreamReader_SuccessWithXPathWithFilter(t *testing.T) { t.Run("IDR snapshot after 3rd Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + sp.Release(n) assert.Equal(t, 11, sp.AtLine()) n, err = sp.Read() @@ -104,6 +108,7 @@ func TestXMLStreamReader_SuccessWithXPathWithoutFilter(t *testing.T) { t.Run("IDR snapshot after 2nd Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + sp.Release(n) assert.Equal(t, 7, sp.AtLine()) // Third `` read @@ -113,6 +118,7 @@ func TestXMLStreamReader_SuccessWithXPathWithoutFilter(t *testing.T) { t.Run("IDR snapshot after 3rd Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + sp.Release(n) assert.Equal(t, 8, sp.AtLine()) // Fourth `` read @@ -122,6 +128,7 @@ func TestXMLStreamReader_SuccessWithXPathWithoutFilter(t *testing.T) { t.Run("IDR snapshot after 4th Read", func(t *testing.T) { cupaloy.SnapshotT(t, JSONify1(rootOf(n))) }) + sp.Release(n) assert.Equal(t, 11, sp.AtLine()) n, err = sp.Read() diff --git a/samples/omniv2/customfileformats/jsonlog/jsonlogformat/reader.go b/samples/omniv2/customfileformats/jsonlog/jsonlogformat/reader.go index 01f530b..aabb556 100644 --- a/samples/omniv2/customfileformats/jsonlog/jsonlogformat/reader.go +++ b/samples/omniv2/customfileformats/jsonlog/jsonlogformat/reader.go @@ -54,7 +54,11 @@ func (r *reader) Read() (*idr.Node, error) { if !strs.IsStrNonBlank(l) { continue } - n, err := parseJSON([]byte(l)) + p, err := idr.NewJSONStreamReader(bytes.NewReader([]byte(l)), ".") + if err != nil { + return nil, err + } + n, err := p.Read() if err != nil { // If we read out a log line fine, but unable to parse it, that shouldn't be // a fatal error, thus not wrapping the error in non-continuable error @@ -70,12 +74,10 @@ func (r *reader) Read() (*idr.Node, error) { } } -func parseJSON(b []byte) (*idr.Node, error) { - p, err := idr.NewJSONStreamReader(bytes.NewReader(b), ".") - if err != nil { - return nil, err +func (r *reader) Release(n *idr.Node) { + if n != nil { + idr.RemoveAndReleaseTree(n) } - return p.Read() } func (r *reader) IsContinuableError(err error) bool { diff --git a/samples/omniv2/json/json_test.go b/samples/omniv2/json/json_test.go index daf7835..47ab3f5 100644 --- a/samples/omniv2/json/json_test.go +++ b/samples/omniv2/json/json_test.go @@ -49,7 +49,7 @@ func init() { } // go test -bench=. -benchmem -benchtime=30s -// Benchmark2_Multiple_Objects-4 95299 377800 ns/op 99312 B/op 2351 allocs/op +// Benchmark2_Multiple_Objects-4 95775 381437 ns/op 84870 B/op 2180 allocs/op func Benchmark2_Multiple_Objects(b *testing.B) { for i := 0; i < b.N; i++ {