From cbada2133a19e33bf02a5fe26e95cf8656221d34 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Wed, 30 Sep 2020 17:56:36 +1300 Subject: [PATCH 1/6] Switch JS runtime caching to a resource pool Just realized a problem, currently we use a loading cache for goja.Runtime caching keyed by ctx. However that means the caching is per transform. We had scenarios in the past where a service handles many concurrent requests, each of which contains a tiny payload that needs to be parsed and transformed. In that situation, the goja.Runtime caching is completely ineffective. Instead, we want to share goja.Runtime across different transforms, as long as javascripts are not called at the same time. Thus the introduction of resource pool. We will adaptively create a number of goja.Runtime in the resource pool, and each of the transforms, concurrent or not, requests a runtime from the pool. We don't want to block, so if there is no available runtime in the pool, while the pool will create a new runtime, it will immediately fail and return. The new execProgram will fallback and create a new runtime. The hope is the resource pool eventually grow to certain size that any concurrent requests would be satisfied. Extensive benchmarks created and verified the new design. With this, now javascript is truly performant enough to fully replace eval. --- customfuncs/javascript.go | 136 ++++++++++++++++++++------------- customfuncs/javascript_test.go | 108 +++++++++++++++++++++++--- go.mod | 2 + go.sum | 4 + 4 files changed, 188 insertions(+), 62 deletions(-) diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index a954c06..73553c7 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -1,6 +1,7 @@ package customfuncs import ( + "context" "errors" "fmt" "strconv" @@ -11,6 +12,7 @@ import ( "github.com/dop251/goja" "github.com/jf-tech/go-corelib/caches" "github.com/jf-tech/go-corelib/strs" + pool "github.com/jolestar/go-commons-pool" "github.com/jf-tech/omniparser/nodes" "github.com/jf-tech/omniparser/transformctx" @@ -66,26 +68,56 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac // For debugging/testing purpose so we can easily disable all the caches. But not exported. We always // want caching in production. -var disableCache = false +var disableCaching = false + +func resetCaches() { + // per schema so won't have too many, no need to put a small cap (default is 64k) + JSProgramCache = caches.NewLoadingCache() + JSRuntimePool = pool.NewObjectPool( + context.Background(), + pool.NewPooledObjectFactorySimple( + func(context.Context) (interface{}, error) { + return goja.New(), nil + }), + func() *pool.ObjectPoolConfig { + cfg := pool.NewDefaultPoolConfig() + cfg.MaxTotal = -1 // no limit + cfg.MaxIdle = -1 // no limit + // keep a minimum 10 idling VMs always around to reduce startup latency. + cfg.MinIdle = 10 + cfg.TimeBetweenEvictionRuns = cfg.MinEvictableIdleTime // turn on eviction + // we don't ever want to block - we'd rather run slowly but let transform continue. So if + // pool is exhausted, just fail BorrowObject call and we'll create a new vm runtime (slowly) + // and continue. + cfg.BlockWhenExhausted = false + return cfg + }()) + // per transform, plus expensive, a smaller cap. + NodeToJSONCache = caches.NewLoadingCache(100) +} // JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used -// across multiple goroutines and across different *goja.Runtime. -var JSProgramCache = caches.NewLoadingCache() // per schema so won't have too many, no need to put a hard cap. -// JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared -// across multiple goroutines. -// TODO still not gonna work well in scenario where a service handles many requests, each of which does a simple -// transform (that only deals with one record) involving a javascript custom_func. We had use case where each input -// was a small json and a transform and ctx is created for each input and does a fast/simple transform. We probably -// need to consider a workerpool for JS runtime so it can be shared globally. Consider using -// https://github.com/gammazero/workerpool/ -var JSRuntimeCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap. -// NodeToJSONCache caches *node.Node tree to translated JSON string. -// TODO if in the future we have *node.Node allocation recycling, then this by-addr caching won't work. -// Ideally, we should have a node ID which refreshes upon recycling. -var NodeToJSONCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap. +// across multiple goroutines and across different *goja.Runtime. If default loading cache capacity +// is not desirable, change JSProgramCache to a loading cache with a different capacity at package +// init time. Be mindful this will be shared across all use cases inside your process. +var JSProgramCache *caches.LoadingCache + +// JSRuntimePool caches *goja.Runtime whose creation is expensive such that we want to have a pool +// of them to amortize the initialization cost. However, a *goja.Runtime cannot be used by two/more +// javascript's at the same time, thus the use of resource pool. If the default pool configuration is +// not desirable, change JSRuntimePool with a different config during package init time. Be mindful +// this will be shared across all use cases inside your process. +var JSRuntimePool *pool.ObjectPool + +// NodeToJSONCache caches *node.Node to JSON translations. +var NodeToJSONCache *caches.LoadingCache + +func init() { + resetCaches() +} func getProgram(js string) (*goja.Program, error) { - if disableCache { + if disableCaching { return goja.Compile("", js, false) } p, err := JSProgramCache.Get(js, func(interface{}) (interface{}, error) { @@ -97,39 +129,48 @@ func getProgram(js string) (*goja.Program, error) { return p.(*goja.Program), nil } -func ptrAddrStr(p unsafe.Pointer) string { - return strconv.FormatUint(uint64(uintptr(p)), 16) -} - -func getRuntime(ctx *transformctx.Ctx) *goja.Runtime { - if disableCache { - return goja.New() - } - // a VM can be reused as long as not across thread. We don't have access to - // thread/goroutine id (nor do we want to use some hack to get it, see - // https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an - // indicator - omniparser runs on a single thread per transform. And ctx is - // is per transform. - addr := ptrAddrStr(unsafe.Pointer(ctx)) - vm, _ := JSRuntimeCache.Get(addr, func(interface{}) (interface{}, error) { - return goja.New(), nil - }) - return vm.(*goja.Runtime) -} - func getNodeJSON(n *node.Node) string { - if disableCache { + if disableCaching { return nodes.JSONify2(n) } - addr := ptrAddrStr(unsafe.Pointer(n)) + // TODO if in the future we have *node.Node allocation recycling, then this by-addr caching + // won't work. Ideally, we should have a node ID which refreshes upon recycling. + addr := strconv.FormatUint(uint64(uintptr(unsafe.Pointer(n))), 16) j, _ := NodeToJSONCache.Get(addr, func(interface{}) (interface{}, error) { return nodes.JSONify2(n), nil }) return j.(string) } +func execProgram(program *goja.Program, args map[string]interface{}) (goja.Value, error) { + var vm *goja.Runtime + if disableCaching { + vm = goja.New() + } else { + poolObj, err := JSRuntimePool.BorrowObject(context.Background()) + if err != nil { + vm = goja.New() + } else { + vm = poolObj.(*goja.Runtime) + } + } + defer func() { + if vm != nil { + // wipe out all the args (by setting them to undefined) in prep for next exec. + for arg := range args { + vm.Set(arg, goja.Undefined()) + } + _ = JSRuntimePool.ReturnObject(context.Background(), vm) + } + }() + for arg, val := range args { + vm.Set(arg, val) + } + return vm.RunProgram(program) +} + // javascriptWithContext is a custom_func that runs a javascript with optional arguments and -// with current node JSON, if the context node is provided. +// with current node JSON, if node is provided. func javascriptWithContext(ctx *transformctx.Ctx, n *node.Node, js string, args ...string) (string, error) { if len(args)%2 != 0 { return "", errors.New("invalid number of args to 'javascript'") @@ -138,25 +179,18 @@ func javascriptWithContext(ctx *transformctx.Ctx, n *node.Node, js string, args if err != nil { return "", fmt.Errorf("invalid javascript: %s", err.Error()) } - runtime := getRuntime(ctx) - var varnames []string - defer func() { - for i := range varnames { - runtime.Set(varnames[i], goja.Undefined()) - } - }() + vmArgs := make(map[string]interface{}) for i := 0; i < len(args)/2; i++ { - varname, val, err := parseArgTypeAndValue(args[i*2], args[i*2+1]) + argName, val, err := parseArgTypeAndValue(args[i*2], args[i*2+1]) if err != nil { return "", err } - runtime.Set(varname, val) - varnames = append(varnames, varname) + vmArgs[argName] = val } if n != nil { - runtime.Set(argNameNode, getNodeJSON(n)) + vmArgs[argNameNode] = getNodeJSON(n) } - v, err := runtime.RunProgram(program) + v, err := execProgram(program, vmArgs) if err != nil { return "", err } diff --git a/customfuncs/javascript_test.go b/customfuncs/javascript_test.go index 6a5e041..2baaa9f 100644 --- a/customfuncs/javascript_test.go +++ b/customfuncs/javascript_test.go @@ -1,9 +1,13 @@ package customfuncs import ( + "context" + "errors" "strings" + "sync" "testing" + pool "github.com/jolestar/go-commons-pool" "github.com/stretchr/testify/assert" "github.com/jf-tech/omniparser/nodes" @@ -68,6 +72,16 @@ func TestParseArgTypeAndValue(t *testing.T) { } } +const ( + noCache = false + withCache = true +) + +func prepCachesForTest(cache bool) { + disableCaching = !cache + resetCaches() +} + func TestJavascript(t *testing.T) { sp, err := nodes.NewJSONStreamReader(strings.NewReader(` { @@ -184,24 +198,46 @@ func TestJavascript(t *testing.T) { } } t.Run(test.name+" (without cache)", func(t *testing.T) { - disableCache = true + prepCachesForTest(noCache) testFn(t) }) t.Run(test.name+" (with cache)", func(t *testing.T) { - disableCache = false + prepCachesForTest(withCache) + resetCaches() testFn(t) }) } } +func TestJSRuntimePoolFailAndFallback(t *testing.T) { + prepCachesForTest(withCache) + JSRuntimePool = pool.NewObjectPoolWithDefaultConfig( + context.Background(), + pool.NewPooledObjectFactorySimple( + func(context.Context) (interface{}, error) { + return nil, errors.New("factory failure") + })) + assertPoolSize := func(size int) { + assert.Equal(t, size, JSRuntimePool.GetNumActive()+JSRuntimePool.GetNumIdle()) + } + assertPoolSize(0) + // The runtime factory will fail but javascript call will still succeed with + // an ad-hoc vm runtime created and no vm runtime added into the pool. + r, err := javascript(nil, "1 + 2") + assert.NoError(t, err) + assert.Equal(t, "3", r) + assertPoolSize(0) +} + func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { + prepCachesForTest(noCache) r, err := javascript(nil, `v1 + v2`, "v1:int", "1", "v2:int", "2") assert.NoError(t, err) assert.Equal(t, "3", r) // Note v1 should be cleared before second run. r, err = javascript(nil, `v3 + v4 + v1`, "v3:int", "10", "v4:int", "20") assert.Error(t, err) - assert.Equal(t, "result is NaN", err.Error()) + assert.Equal(t, `ReferenceError: v1 is not defined at :1:11(3)`, err.Error()) assert.Equal(t, "", r) // Run again without using v1. r, err = javascript(nil, `v3 + v4`, "v3:int", "10", "v4:int", "20") @@ -210,10 +246,17 @@ func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { } // go test -bench=. -benchmem -benchtime=30s -// BenchmarkIfElse-4 234978459 152 ns/op 69 B/op 1 allocs/op -// BenchmarkEval-4 19715643 1871 ns/op 576 B/op 11 allocs/op -// BenchmarkJavascriptWithNoCache-4 165547 218455 ns/op 136733 B/op 1704 allocs/op -// BenchmarkJavascriptWithCache-4 17685051 2047 ns/op 272 B/op 15 allocs/op +// BenchmarkIfElse-4 218432557 156 ns/op 69 B/op 1 allocs/op +// BenchmarkEval-4 19546273 1876 ns/op 576 B/op 11 allocs/op +// BenchmarkJavascriptWithNoCache-4 164118 221029 ns/op 136686 B/op 1701 allocs/op +// BenchmarkJavascriptWithCache-4 13124720 2789 ns/op 224 B/op 11 allocs/op +// BenchmarkConcurrentJavascriptWithNoCache-4 1099 33752282 ns/op 27344894 B/op 340254 allocs/op +// BenchmarkConcurrentJavascriptWithCache-4 51764 698640 ns/op 44879 B/op 2335 allocs/op +// --- BENCH: BenchmarkConcurrentJavascriptWithCache-4 +// javascript_test.go:344: pool size: 4 +// javascript_test.go:344: pool size: 5 +// javascript_test.go:344: pool size: 47 +// javascript_test.go:344: pool size: 68 var ( benchTitles = []string{"", "Dr", "Sir"} @@ -269,8 +312,7 @@ func BenchmarkEval(b *testing.B) { } } -func benchmarkJavascript(b *testing.B, cache bool) { - disableCache = !cache +func benchmarkJavascript(b *testing.B) { for i := 0; i < b.N; i++ { ret, err := javascript(nil, ` if (!title) { @@ -292,9 +334,53 @@ func benchmarkJavascript(b *testing.B, cache bool) { } func BenchmarkJavascriptWithNoCache(b *testing.B) { - benchmarkJavascript(b, false) + prepCachesForTest(noCache) + benchmarkJavascript(b) } func BenchmarkJavascriptWithCache(b *testing.B) { - benchmarkJavascript(b, true) + prepCachesForTest(withCache) + benchmarkJavascript(b) +} + +func concurrentBenchmarkJavascript(b *testing.B) { + concurrency := 200 + for i := 0; i < b.N; i++ { + wg := &sync.WaitGroup{} + wg.Add(concurrency) + for j := 0; j < concurrency; j++ { + index := i + go func() { + defer wg.Done() + ret, err := javascript(nil, ` + if (!title) { + "" + } else if (!name) { + "" + } else { + title + " " + name + }`, + "title:string", benchTitles[index%len(benchTitles)], + "name:string", benchNames[index%len(benchNames)]) + if err != nil { + b.FailNow() + } + if ret != benchResults[index%len(benchResults)] { + b.FailNow() + } + }() + } + wg.Wait() + } +} + +func BenchmarkConcurrentJavascriptWithNoCache(b *testing.B) { + prepCachesForTest(noCache) + concurrentBenchmarkJavascript(b) +} + +func BenchmarkConcurrentJavascriptWithCache(b *testing.B) { + prepCachesForTest(withCache) + concurrentBenchmarkJavascript(b) + b.Logf("pool size: %d", JSRuntimePool.GetNumActive()+JSRuntimePool.GetNumIdle()) } diff --git a/go.mod b/go.mod index 3c4c745..da42f09 100644 --- a/go.mod +++ b/go.mod @@ -9,10 +9,12 @@ require ( github.com/bradleyjkemp/cupaloy v2.3.0+incompatible github.com/dlclark/regexp2 v1.2.1 // indirect github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41 + github.com/fortytw2/leaktest v1.3.0 // indirect github.com/go-chi/chi v4.1.2+incompatible github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/google/uuid v1.1.2 github.com/jf-tech/go-corelib v0.0.4 + github.com/jolestar/go-commons-pool v2.0.0+incompatible github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.6.1 diff --git a/go.sum b/go.sum index 4bdbd4e..9f47f64 100644 --- a/go.sum +++ b/go.sum @@ -32,6 +32,8 @@ github.com/dlclark/regexp2 v1.2.1 h1:Ff/S0snjr1oZHUNOkvA/gP6KUaMg5vDDl3Qnhjnwgm8 github.com/dlclark/regexp2 v1.2.1/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41 h1:2P55x6IerzvQIv7bdKEQQWl93uIEQgh6417+uwHGtKQ= github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41/go.mod h1:Mw6PkjjMXWbTj+nnj4s3QPXq1jaT0s5pC0iFD4+BOAA= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-chi/chi v4.1.2+incompatible h1:fGFk2Gmi/YKXk0OmGfBh0WgmN3XB8lVnEyNz34tQRec= @@ -66,6 +68,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jf-tech/go-corelib v0.0.4 h1:XP5w5bumH/zR6/EZGzD4webeZ1BPU62xZvraAiyIqdc= github.com/jf-tech/go-corelib v0.0.4/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg= +github.com/jolestar/go-commons-pool v2.0.0+incompatible h1:uHn5uRKsLLQSf9f1J5QPY2xREWx/YH+e4bIIXcAuAaE= +github.com/jolestar/go-commons-pool v2.0.0+incompatible/go.mod h1:ChJYIbIch0DMCSU6VU0t0xhPoWDR2mMFIQek3XWU0s8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= From 207997572628e6a0c0700633de557f551efebedf Mon Sep 17 00:00:00 2001 From: jf-tech Date: Wed, 30 Sep 2020 18:03:20 +1300 Subject: [PATCH 2/6] add benchmark result to sample json --- samples/omniv2/json/json_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/omniv2/json/json_test.go b/samples/omniv2/json/json_test.go index 90df6f9..d742cc2 100644 --- a/samples/omniv2/json/json_test.go +++ b/samples/omniv2/json/json_test.go @@ -48,6 +48,8 @@ func init() { } } +// Benchmark2_Multiple_Objects-4 3112 390401 ns/op 98305 B/op 2343 allocs/op + func Benchmark2_Multiple_Objects(b *testing.B) { for i := 0; i < b.N; i++ { transform, err := benchSchema.NewTransform( From f706d120e161722041ce146708aad22de014157a Mon Sep 17 00:00:00 2001 From: jf-tech Date: Wed, 30 Sep 2020 19:22:50 +1300 Subject: [PATCH 3/6] Given we use MaxTotal = -1 (aka no limit) there is actually no need to set BlockWhenExhausted to false. --- customfuncs/javascript.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index 73553c7..4572f9b 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -86,10 +86,6 @@ func resetCaches() { // keep a minimum 10 idling VMs always around to reduce startup latency. cfg.MinIdle = 10 cfg.TimeBetweenEvictionRuns = cfg.MinEvictableIdleTime // turn on eviction - // we don't ever want to block - we'd rather run slowly but let transform continue. So if - // pool is exhausted, just fail BorrowObject call and we'll create a new vm runtime (slowly) - // and continue. - cfg.BlockWhenExhausted = false return cfg }()) // per transform, plus expensive, a smaller cap. From 0549b04ea0e5787a6dadd127138de595c77a0948 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Wed, 30 Sep 2020 21:35:05 +1300 Subject: [PATCH 4/6] remove use of NewObjectPool instead use sdk's sync.Pool --- customfuncs/javascript.go | 65 ++++++++++++-------------------- customfuncs/javascript_test.go | 41 +++----------------- go.mod | 2 - go.sum | 4 -- samples/omniv2/json/json_test.go | 3 +- 5 files changed, 33 insertions(+), 82 deletions(-) diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index 4572f9b..806113d 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -1,18 +1,17 @@ package customfuncs import ( - "context" "errors" "fmt" "strconv" "strings" + "sync" "unsafe" node "github.com/antchfx/xmlquery" "github.com/dop251/goja" "github.com/jf-tech/go-corelib/caches" "github.com/jf-tech/go-corelib/strs" - pool "github.com/jolestar/go-commons-pool" "github.com/jf-tech/omniparser/nodes" "github.com/jf-tech/omniparser/transformctx" @@ -66,48 +65,35 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac } } -// For debugging/testing purpose so we can easily disable all the caches. But not exported. We always -// want caching in production. -var disableCaching = false - -func resetCaches() { - // per schema so won't have too many, no need to put a small cap (default is 64k) - JSProgramCache = caches.NewLoadingCache() - JSRuntimePool = pool.NewObjectPool( - context.Background(), - pool.NewPooledObjectFactorySimple( - func(context.Context) (interface{}, error) { - return goja.New(), nil - }), - func() *pool.ObjectPoolConfig { - cfg := pool.NewDefaultPoolConfig() - cfg.MaxTotal = -1 // no limit - cfg.MaxIdle = -1 // no limit - // keep a minimum 10 idling VMs always around to reduce startup latency. - cfg.MinIdle = 10 - cfg.TimeBetweenEvictionRuns = cfg.MinEvictableIdleTime // turn on eviction - return cfg - }()) - // per transform, plus expensive, a smaller cap. - NodeToJSONCache = caches.NewLoadingCache(100) -} - // JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used // across multiple goroutines and across different *goja.Runtime. If default loading cache capacity // is not desirable, change JSProgramCache to a loading cache with a different capacity at package // init time. Be mindful this will be shared across all use cases inside your process. var JSProgramCache *caches.LoadingCache -// JSRuntimePool caches *goja.Runtime whose creation is expensive such that we want to have a pool +// jsRuntimePool caches *goja.Runtime whose creation is expensive such that we want to have a pool // of them to amortize the initialization cost. However, a *goja.Runtime cannot be used by two/more -// javascript's at the same time, thus the use of resource pool. If the default pool configuration is -// not desirable, change JSRuntimePool with a different config during package init time. Be mindful -// this will be shared across all use cases inside your process. -var JSRuntimePool *pool.ObjectPool +// javascript's at the same time, thus the use of sync.Pool. Not user customizable. +// var jsRuntimePool *resPool +var jsRuntimePool sync.Pool // NodeToJSONCache caches *node.Node to JSON translations. var NodeToJSONCache *caches.LoadingCache +// For debugging/testing purpose so we can easily disable all the caches. But not exported. We always +// want caching in production. +var disableCaching = false + +func resetCaches() { + JSProgramCache = caches.NewLoadingCache() + jsRuntimePool = sync.Pool{ + New: func() interface{} { + return goja.New() + }, + } + NodeToJSONCache = caches.NewLoadingCache() +} + func init() { resetCaches() } @@ -140,15 +126,12 @@ func getNodeJSON(n *node.Node) string { func execProgram(program *goja.Program, args map[string]interface{}) (goja.Value, error) { var vm *goja.Runtime + var poolObj interface{} if disableCaching { vm = goja.New() } else { - poolObj, err := JSRuntimePool.BorrowObject(context.Background()) - if err != nil { - vm = goja.New() - } else { - vm = poolObj.(*goja.Runtime) - } + poolObj = jsRuntimePool.Get() + vm = poolObj.(*goja.Runtime) } defer func() { if vm != nil { @@ -156,7 +139,9 @@ func execProgram(program *goja.Program, args map[string]interface{}) (goja.Value for arg := range args { vm.Set(arg, goja.Undefined()) } - _ = JSRuntimePool.ReturnObject(context.Background(), vm) + } + if poolObj != nil { + jsRuntimePool.Put(poolObj) } }() for arg, val := range args { diff --git a/customfuncs/javascript_test.go b/customfuncs/javascript_test.go index 2baaa9f..82be128 100644 --- a/customfuncs/javascript_test.go +++ b/customfuncs/javascript_test.go @@ -1,13 +1,10 @@ package customfuncs import ( - "context" - "errors" "strings" "sync" "testing" - pool "github.com/jolestar/go-commons-pool" "github.com/stretchr/testify/assert" "github.com/jf-tech/omniparser/nodes" @@ -209,26 +206,6 @@ func TestJavascript(t *testing.T) { } } -func TestJSRuntimePoolFailAndFallback(t *testing.T) { - prepCachesForTest(withCache) - JSRuntimePool = pool.NewObjectPoolWithDefaultConfig( - context.Background(), - pool.NewPooledObjectFactorySimple( - func(context.Context) (interface{}, error) { - return nil, errors.New("factory failure") - })) - assertPoolSize := func(size int) { - assert.Equal(t, size, JSRuntimePool.GetNumActive()+JSRuntimePool.GetNumIdle()) - } - assertPoolSize(0) - // The runtime factory will fail but javascript call will still succeed with - // an ad-hoc vm runtime created and no vm runtime added into the pool. - r, err := javascript(nil, "1 + 2") - assert.NoError(t, err) - assert.Equal(t, "3", r) - assertPoolSize(0) -} - func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { prepCachesForTest(noCache) r, err := javascript(nil, `v1 + v2`, "v1:int", "1", "v2:int", "2") @@ -246,17 +223,12 @@ func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { } // go test -bench=. -benchmem -benchtime=30s -// BenchmarkIfElse-4 218432557 156 ns/op 69 B/op 1 allocs/op -// BenchmarkEval-4 19546273 1876 ns/op 576 B/op 11 allocs/op -// BenchmarkJavascriptWithNoCache-4 164118 221029 ns/op 136686 B/op 1701 allocs/op -// BenchmarkJavascriptWithCache-4 13124720 2789 ns/op 224 B/op 11 allocs/op -// BenchmarkConcurrentJavascriptWithNoCache-4 1099 33752282 ns/op 27344894 B/op 340254 allocs/op -// BenchmarkConcurrentJavascriptWithCache-4 51764 698640 ns/op 44879 B/op 2335 allocs/op -// --- BENCH: BenchmarkConcurrentJavascriptWithCache-4 -// javascript_test.go:344: pool size: 4 -// javascript_test.go:344: pool size: 5 -// javascript_test.go:344: pool size: 47 -// javascript_test.go:344: pool size: 68 +// BenchmarkIfElse-4 236776694 152 ns/op 69 B/op 1 allocs/op +// BenchmarkEval-4 19235595 1872 ns/op 576 B/op 11 allocs/op +// BenchmarkJavascriptWithNoCache-4 171214 214641 ns/op 136674 B/op 1700 allocs/op +// BenchmarkJavascriptWithCache-4 18326389 1976 ns/op 193 B/op 10 allocs/op +// BenchmarkConcurrentJavascriptWithNoCache-4 1082 33534133 ns/op 27341317 B/op 340052 allocs/op +// BenchmarkConcurrentJavascriptWithCache-4 59512 564495 ns/op 39899 B/op 2152 allocs/op var ( benchTitles = []string{"", "Dr", "Sir"} @@ -382,5 +354,4 @@ func BenchmarkConcurrentJavascriptWithNoCache(b *testing.B) { func BenchmarkConcurrentJavascriptWithCache(b *testing.B) { prepCachesForTest(withCache) concurrentBenchmarkJavascript(b) - b.Logf("pool size: %d", JSRuntimePool.GetNumActive()+JSRuntimePool.GetNumIdle()) } diff --git a/go.mod b/go.mod index da42f09..3c4c745 100644 --- a/go.mod +++ b/go.mod @@ -9,12 +9,10 @@ require ( github.com/bradleyjkemp/cupaloy v2.3.0+incompatible github.com/dlclark/regexp2 v1.2.1 // indirect github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41 - github.com/fortytw2/leaktest v1.3.0 // indirect github.com/go-chi/chi v4.1.2+incompatible github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/google/uuid v1.1.2 github.com/jf-tech/go-corelib v0.0.4 - github.com/jolestar/go-commons-pool v2.0.0+incompatible github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.6.1 diff --git a/go.sum b/go.sum index 9f47f64..4bdbd4e 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,6 @@ github.com/dlclark/regexp2 v1.2.1 h1:Ff/S0snjr1oZHUNOkvA/gP6KUaMg5vDDl3Qnhjnwgm8 github.com/dlclark/regexp2 v1.2.1/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41 h1:2P55x6IerzvQIv7bdKEQQWl93uIEQgh6417+uwHGtKQ= github.com/dop251/goja v0.0.0-20200912112403-81ddb8a7cc41/go.mod h1:Mw6PkjjMXWbTj+nnj4s3QPXq1jaT0s5pC0iFD4+BOAA= -github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= -github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-chi/chi v4.1.2+incompatible h1:fGFk2Gmi/YKXk0OmGfBh0WgmN3XB8lVnEyNz34tQRec= @@ -68,8 +66,6 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jf-tech/go-corelib v0.0.4 h1:XP5w5bumH/zR6/EZGzD4webeZ1BPU62xZvraAiyIqdc= github.com/jf-tech/go-corelib v0.0.4/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg= -github.com/jolestar/go-commons-pool v2.0.0+incompatible h1:uHn5uRKsLLQSf9f1J5QPY2xREWx/YH+e4bIIXcAuAaE= -github.com/jolestar/go-commons-pool v2.0.0+incompatible/go.mod h1:ChJYIbIch0DMCSU6VU0t0xhPoWDR2mMFIQek3XWU0s8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= diff --git a/samples/omniv2/json/json_test.go b/samples/omniv2/json/json_test.go index d742cc2..daf7835 100644 --- a/samples/omniv2/json/json_test.go +++ b/samples/omniv2/json/json_test.go @@ -48,7 +48,8 @@ func init() { } } -// Benchmark2_Multiple_Objects-4 3112 390401 ns/op 98305 B/op 2343 allocs/op +// go test -bench=. -benchmem -benchtime=30s +// Benchmark2_Multiple_Objects-4 95299 377800 ns/op 99312 B/op 2351 allocs/op func Benchmark2_Multiple_Objects(b *testing.B) { for i := 0; i < b.N; i++ { From 48f335e1a1097ab81e791d6988679ba749469eee Mon Sep 17 00:00:00 2001 From: jf-tech Date: Wed, 30 Sep 2020 22:22:59 +1300 Subject: [PATCH 5/6] a bit more memory alloc reduction and update benchmark results --- customfuncs/customFuncs.go | 7 +++---- customfuncs/javascript.go | 10 +++++----- customfuncs/javascript_test.go | 12 ++++++------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/customfuncs/customFuncs.go b/customfuncs/customFuncs.go index 2e01ace..cd46d5a 100644 --- a/customfuncs/customFuncs.go +++ b/customfuncs/customFuncs.go @@ -1,7 +1,6 @@ package customfuncs import ( - "bytes" "encoding/json" "fmt" "math" @@ -76,11 +75,11 @@ func coalesce(_ *transformctx.Ctx, strs ...string) (string, error) { } func concat(_ *transformctx.Ctx, strs ...string) (string, error) { - var b bytes.Buffer + var w strings.Builder for _, s := range strs { - b.WriteString(s) + w.WriteString(s) } - return b.String(), nil + return w.String(), nil } func containsPattern(_ *transformctx.Ctx, regexPattern string, strs ...string) (string, error) { diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index 806113d..7e7a178 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -29,17 +29,17 @@ const ( ) func parseArgTypeAndValue(argDecl, argValue string) (name string, value interface{}, err error) { - declParts := strings.Split(argDecl, ":") - if len(declParts) != 2 { + col := strings.IndexRune(argDecl, ':') + if col < 0 { return "", nil, fmt.Errorf( "arg decl must be in format of ':', instead got '%s'", argDecl) } - name = declParts[0] + name = argDecl[:col] if !strs.IsStrNonBlank(name) { return "", nil, fmt.Errorf( "arg_name in ':' cannot be a blank string, instead got '%s'", argDecl) } - switch declParts[1] { + switch argDecl[col+1:] { case argTypeString: return name, argValue, nil case argTypeInt: @@ -61,7 +61,7 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac } return name, b, nil default: - return "", nil, fmt.Errorf("arg_type '%s' in ':' is not supported", declParts[1]) + return "", nil, fmt.Errorf("arg_type '%s' in ':' is not supported", argDecl[col+1:]) } } diff --git a/customfuncs/javascript_test.go b/customfuncs/javascript_test.go index 82be128..db0c0c7 100644 --- a/customfuncs/javascript_test.go +++ b/customfuncs/javascript_test.go @@ -223,12 +223,12 @@ func TestJavascriptClearVarsAfterRunProgram(t *testing.T) { } // go test -bench=. -benchmem -benchtime=30s -// BenchmarkIfElse-4 236776694 152 ns/op 69 B/op 1 allocs/op -// BenchmarkEval-4 19235595 1872 ns/op 576 B/op 11 allocs/op -// BenchmarkJavascriptWithNoCache-4 171214 214641 ns/op 136674 B/op 1700 allocs/op -// BenchmarkJavascriptWithCache-4 18326389 1976 ns/op 193 B/op 10 allocs/op -// BenchmarkConcurrentJavascriptWithNoCache-4 1082 33534133 ns/op 27341317 B/op 340052 allocs/op -// BenchmarkConcurrentJavascriptWithCache-4 59512 564495 ns/op 39899 B/op 2152 allocs/op +// BenchmarkIfElse-4 368017143 98.1 ns/op 8 B/op 1 allocs/op +// BenchmarkEval-4 26409430 1386 ns/op 418 B/op 8 allocs/op +// BenchmarkJavascriptWithNoCache-4 172803 210958 ns/op 136608 B/op 1698 allocs/op +// BenchmarkJavascriptWithCache-4 23059004 1572 ns/op 129 B/op 8 allocs/op +// BenchmarkConcurrentJavascriptWithNoCache-4 1140 32729941 ns/op 27328924 B/op 339654 allocs/op +// BenchmarkConcurrentJavascriptWithCache-4 70977 504870 ns/op 26568 B/op 1745 allocs/op var ( benchTitles = []string{"", "Dr", "Sir"} From 33caf5dc9989119aaf81cc0ec67211d7edaef438 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Thu, 1 Oct 2020 04:38:29 +1300 Subject: [PATCH 6/6] PR comment --- customfuncs/javascript.go | 1 - 1 file changed, 1 deletion(-) diff --git a/customfuncs/javascript.go b/customfuncs/javascript.go index 7e7a178..95c55b6 100644 --- a/customfuncs/javascript.go +++ b/customfuncs/javascript.go @@ -74,7 +74,6 @@ var JSProgramCache *caches.LoadingCache // jsRuntimePool caches *goja.Runtime whose creation is expensive such that we want to have a pool // of them to amortize the initialization cost. However, a *goja.Runtime cannot be used by two/more // javascript's at the same time, thus the use of sync.Pool. Not user customizable. -// var jsRuntimePool *resPool var jsRuntimePool sync.Pool // NodeToJSONCache caches *node.Node to JSON translations.