Skip to content

Commit

Permalink
Improve fast regexp matcher cache (#482)
Browse files Browse the repository at this point in the history
* Limit FastRegexMatcher by size (bytes) and add a TTL to each entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Add TestNewFastRegexMatcher_CacheSizeLimit

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Tolerate ristretto goroutines when checking goroutine leaks

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Tolerate ristretto goroutines when checking goroutine leaks

Signed-off-by: Marco Pracucci <marco@pracucci.com>

---------

Signed-off-by: Marco Pracucci <marco@pracucci.com>
  • Loading branch information
pracucci committed Apr 17, 2023
1 parent 8ef48ad commit c461e22
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 22 deletions.
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ require (
github.com/Azure/azure-sdk-for-go v65.0.0+incompatible
github.com/Azure/go-autorest/autorest v0.11.28
github.com/Azure/go-autorest/autorest/adal v0.9.22
github.com/DmitriyVTitov/size v1.5.0
github.com/alecthomas/kingpin/v2 v2.3.2
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137
github.com/aws/aws-sdk-go v1.44.217
github.com/cespare/xxhash/v2 v2.2.0
github.com/dennwc/varint v1.0.0
github.com/dgraph-io/ristretto v0.1.1
github.com/digitalocean/godo v1.98.0
github.com/docker/docker v23.0.1+incompatible
github.com/edsrzf/mmap-go v1.1.0
Expand All @@ -28,7 +30,6 @@ require (
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.20.0
github.com/hashicorp/golang-lru/v2 v2.0.2
github.com/hashicorp/nomad/api v0.0.0-20230308192510-48e7d70fcd4b
github.com/hetznercloud/hcloud-go v1.41.0
github.com/ionos-cloud/sdk-go/v6 v6.1.5
Expand Down Expand Up @@ -84,6 +85,7 @@ require (
require (
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/rogpeppe/go-internal v1.9.0 // indirect
Expand Down
11 changes: 9 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBp
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g=
github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0=
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
Expand Down Expand Up @@ -148,7 +150,11 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWajOK8=
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/digitalocean/godo v1.98.0 h1:potyC1eD0N9n5/P4/WmJuKgg+OGYZOBWEW+/aKTX6QQ=
github.com/digitalocean/godo v1.98.0/go.mod h1:NRpFznZFvhHjBoqZAaOD3khVzsJ3EibzKqFL4R60dmA=
github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI=
Expand All @@ -162,6 +168,8 @@ github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
Expand Down Expand Up @@ -444,8 +452,6 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
Expand Down Expand Up @@ -1001,6 +1007,7 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
24 changes: 17 additions & 7 deletions model/labels/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ package labels

import (
"strings"
"time"

"github.com/DmitriyVTitov/size"
"github.com/dgraph-io/ristretto"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
lru "github.com/hashicorp/golang-lru/v2"
)

const (
Expand All @@ -29,14 +31,22 @@ const (
// to match values instead of iterating over a list. This value has
// been computed running BenchmarkOptimizeEqualStringMatchers.
minEqualMultiStringMatcherMapThreshold = 16

fastRegexMatcherCacheMaxSizeBytes = 1024 * 1024 * 1024 // 1GB
fastRegexMatcherCacheTTL = 5 * time.Minute
)

var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher]
var fastRegexMatcherCache *ristretto.Cache

func init() {
// Ignore error because it can only return error if size is invalid,
// but we're using an hardcoded size here.
fastRegexMatcherCache, _ = lru.New[string, *FastRegexMatcher](10000)
// Ignore error because it can only return error if config is invalid,
// but we're using an hardcoded static config here.
fastRegexMatcherCache, _ = ristretto.NewCache(&ristretto.Config{
NumCounters: 100_000, // 10x the max number of expected items (takes 3 bytes per counter),
MaxCost: fastRegexMatcherCacheMaxSizeBytes,
BufferItems: 64, // Recommended default per the Config docs,
Metrics: false,
})
}

type FastRegexMatcher struct {
Expand All @@ -58,7 +68,7 @@ type FastRegexMatcher struct {
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
// Check the cache.
if matcher, ok := fastRegexMatcherCache.Get(v); ok {
return matcher, nil
return matcher.(*FastRegexMatcher), nil
}

// Create a new matcher.
Expand All @@ -68,7 +78,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
}

// Cache it.
fastRegexMatcherCache.Add(v, matcher)
fastRegexMatcherCache.SetWithTTL(v, matcher, int64(size.Of(matcher)), fastRegexMatcherCacheTTL)

return matcher, nil
}
Expand Down
63 changes: 62 additions & 1 deletion model/labels/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"testing"
"time"

"github.com/DmitriyVTitov/size"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -98,6 +99,66 @@ func TestNewFastRegexMatcher(t *testing.T) {
}
}

func TestNewFastRegexMatcher_CacheSizeLimit(t *testing.T) {
// Start with an empty cache.
fastRegexMatcherCache.Clear()

// Init the random seed with a constant, so that it doesn't change between runs.
randGenerator := rand.New(rand.NewSource(1))

// Generate a very expensive regex.
alternates := make([]string, 1000)
for i := 0; i < len(alternates); i++ {
alternates[i] = randString(randGenerator, 100) + fmt.Sprintf(".%d", i)
}
expensiveRegexp := strings.Join(alternates, "|")

// Utility function to get a unique expensive regexp.
getExpensiveRegexp := func(id int) string {
return expensiveRegexp + fmt.Sprintf("%d", id)
}

// Estimate the size of the matcher with the expensive regexp.
m, err := newFastRegexMatcherWithoutCache(expensiveRegexp)
require.NoError(t, err)
expensiveRegexpSizeBytes := size.Of(m)
t.Logf("expensive regexp estimated size (bytes): %d", expensiveRegexpSizeBytes)

// Estimate the max number of items in the cache.
estimatedMaxItemsInCache := fastRegexMatcherCacheMaxSizeBytes / expensiveRegexpSizeBytes

// Fill the cache.
for i := 0; i < estimatedMaxItemsInCache; i++ {
_, err := NewFastRegexMatcher(getExpensiveRegexp(i))
require.NoError(t, err)
}

// Ensure all regexp matchers are still in the cache.
fastRegexMatcherCache.Wait()

for i := 0; i < estimatedMaxItemsInCache; i++ {
_, ok := fastRegexMatcherCache.Get(getExpensiveRegexp(i))
require.True(t, ok, "checking if regexp matcher #%d is still in the cache", i)
}

// Add one more regexp matcher to the cache.
_, err = NewFastRegexMatcher(getExpensiveRegexp(estimatedMaxItemsInCache + 1))
require.NoError(t, err)

// Ensure one item has been evicted from the cache to make room for the new entry.
fastRegexMatcherCache.Wait()

numEvicted := 0
for i := 0; i < estimatedMaxItemsInCache; i++ {
if _, ok := fastRegexMatcherCache.Get(getExpensiveRegexp(i)); !ok {
t.Logf("the regexp matcher #%d has been evicted from the cache", i)
numEvicted++
}
}

require.Equal(t, 1, numEvicted)
}

func BenchmarkNewFastRegexMatcher(b *testing.B) {
runBenchmark := func(newFunc func(v string) (*FastRegexMatcher, error)) func(b *testing.B) {
return func(b *testing.B) {
Expand Down Expand Up @@ -130,7 +191,7 @@ func BenchmarkNewFastRegexMatcher_CacheMisses(b *testing.B) {
for testName, regexpPrefix := range tests {
b.Run(testName, func(b *testing.B) {
// Ensure the cache is empty.
fastRegexMatcherCache.Purge()
fastRegexMatcherCache.Clear()

b.ResetTimer()

Expand Down
4 changes: 2 additions & 2 deletions promql/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import (
"github.com/go-kit/log"

"github.com/prometheus/prometheus/tsdb/tsdbutil"
"github.com/prometheus/prometheus/util/testutil"

"github.com/stretchr/testify/require"
"go.uber.org/goleak"

"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
Expand All @@ -39,7 +39,7 @@ import (
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
testutil.TolerantVerifyLeak(m)
}

func TestQueryConcurrency(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions rules/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
"go.uber.org/goleak"
"gopkg.in/yaml.v2"

"github.com/prometheus/prometheus/model/labels"
Expand All @@ -42,10 +41,11 @@ import (
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/tsdbutil"
"github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil"
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
testutil.TolerantVerifyLeak(m)
}

func TestAlertingRule(t *testing.T) {
Expand Down
9 changes: 8 additions & 1 deletion tsdb/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,14 @@ func TestMain(m *testing.M) {
flag.Parse()
defaultIsolationDisabled = !isolationEnabled

goleak.VerifyTestMain(m, goleak.IgnoreTopFunction("github.com/prometheus/prometheus/tsdb.(*SegmentWAL).cut.func1"), goleak.IgnoreTopFunction("github.com/prometheus/prometheus/tsdb.(*SegmentWAL).cut.func2"))
goleak.VerifyTestMain(m,
goleak.IgnoreTopFunction("github.com/prometheus/prometheus/tsdb.(*SegmentWAL).cut.func1"),
goleak.IgnoreTopFunction("github.com/prometheus/prometheus/tsdb.(*SegmentWAL).cut.func2"),
// Ignore "ristretto" and its dependency "glog".
goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"),
goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"),
goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"),
)
}

func openTestDB(t testing.TB, opts *Options, rngs []int64) (db *DB) {
Expand Down
3 changes: 1 addition & 2 deletions tsdb/index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (

"github.com/pkg/errors"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/storage"
Expand All @@ -37,7 +36,7 @@ import (
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
testutil.TolerantVerifyLeak(m)
}

type series struct {
Expand Down
4 changes: 2 additions & 2 deletions tsdb/tombstones/tombstones_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import (

"github.com/go-kit/log"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"

"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/testutil"
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
testutil.TolerantVerifyLeak(m)
}

func TestWriteAndReadbackTombstones(t *testing.T) {
Expand Down
3 changes: 1 addition & 2 deletions tsdb/wlog/wlog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,13 @@ import (

client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"

"github.com/prometheus/prometheus/tsdb/fileutil"
"github.com/prometheus/prometheus/util/testutil"
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
testutil.TolerantVerifyLeak(m)
}

// TestWALRepair_ReadingError ensures that a repair is run for an error
Expand Down
4 changes: 4 additions & 0 deletions util/testutil/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@ func TolerantVerifyLeak(m *testing.M) {
// positives.
// https://github.com/kubernetes/client-go/blob/f6ce18ae578c8cca64d14ab9687824d9e1305a67/util/workqueue/queue.go#L201
goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*Type).updateUnfinishedWorkLoop"),
// Ignore "ristretto" and its dependency "glog".
goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"),
goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"),
goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"),
)
}

0 comments on commit c461e22

Please sign in to comment.