From bdd0b8522b79a4747ce7a33146a7b2727f7ed134 Mon Sep 17 00:00:00 2001 From: Jon Kartago Lamida Date: Tue, 18 Apr 2023 08:22:48 +0800 Subject: [PATCH] Improve memory limit on the in-memory cache used for regular expression matchers (#4751) (#4758) * Improve memory limit on the in-memory cache used for regular expression matchers Signed-off-by: Marco Pracucci * Fixed CHANGELOG Signed-off-by: Marco Pracucci * Ignore ristretto.Cache goroutines Signed-off-by: Marco Pracucci * Fix unit tests Signed-off-by: Marco Pracucci --------- Signed-off-by: Marco Pracucci (cherry picked from commit 020e69d80bfd5ca5f6dfddc9fe341f7aa027f8c9) Co-authored-by: Marco Pracucci --- CHANGELOG.md | 1 + go.mod | 5 +- go.sum | 13 +- .../indexheader/index/symbols_test.go | 4 +- pkg/storegateway/postings_codec_test.go | 10 +- pkg/util/test/leak.go | 6 + .../github.com/DmitriyVTitov/size/.gitignore | 19 + vendor/github.com/DmitriyVTitov/size/LICENSE | 21 + .../github.com/DmitriyVTitov/size/README.md | 48 + vendor/github.com/DmitriyVTitov/size/size.go | 142 ++ .../dgraph-io/ristretto/.deepsource.toml | 17 + .../dgraph-io/ristretto/.go-version | 1 + .../dgraph-io/ristretto/.golangci.yml | 23 + .../dgraph-io/ristretto/CHANGELOG.md | 187 +++ vendor/github.com/dgraph-io/ristretto/LICENSE | 176 +++ .../github.com/dgraph-io/ristretto/README.md | 220 +++ .../github.com/dgraph-io/ristretto/cache.go | 719 ++++++++++ .../github.com/dgraph-io/ristretto/policy.go | 423 ++++++ vendor/github.com/dgraph-io/ristretto/ring.go | 91 ++ .../github.com/dgraph-io/ristretto/sketch.go | 156 +++ .../github.com/dgraph-io/ristretto/store.go | 242 ++++ vendor/github.com/dgraph-io/ristretto/ttl.go | 147 ++ .../github.com/dgraph-io/ristretto/z/LICENSE | 64 + .../dgraph-io/ristretto/z/README.md | 129 ++ .../dgraph-io/ristretto/z/allocator.go | 403 ++++++ .../dgraph-io/ristretto/z/bbloom.go | 211 +++ .../github.com/dgraph-io/ristretto/z/btree.go | 710 ++++++++++ .../dgraph-io/ristretto/z/buffer.go | 544 ++++++++ .../dgraph-io/ristretto/z/calloc.go | 42 + .../dgraph-io/ristretto/z/calloc_32bit.go | 14 + .../dgraph-io/ristretto/z/calloc_64bit.go | 14 + .../dgraph-io/ristretto/z/calloc_jemalloc.go | 172 +++ .../ristretto/z/calloc_nojemalloc.go | 37 + .../github.com/dgraph-io/ristretto/z/file.go | 217 +++ .../dgraph-io/ristretto/z/file_default.go | 39 + .../dgraph-io/ristretto/z/file_linux.go | 37 + .../github.com/dgraph-io/ristretto/z/flags.go | 311 +++++ .../dgraph-io/ristretto/z/histogram.go | 205 +++ .../github.com/dgraph-io/ristretto/z/mmap.go | 44 + .../dgraph-io/ristretto/z/mmap_darwin.go | 59 + .../dgraph-io/ristretto/z/mmap_linux.go | 71 + .../dgraph-io/ristretto/z/mmap_plan9.go | 44 + .../dgraph-io/ristretto/z/mmap_unix.go | 55 + .../dgraph-io/ristretto/z/mmap_windows.go | 96 ++ .../dgraph-io/ristretto/z/mremap_linux.go | 56 + .../ristretto/z/mremap_linux_arm64.go | 52 + .../dgraph-io/ristretto/z/rtutil.go | 75 ++ .../github.com/dgraph-io/ristretto/z/rtutil.s | 0 .../dgraph-io/ristretto/z/simd/baseline.go | 127 ++ .../dgraph-io/ristretto/z/simd/search.go | 51 + .../dgraph-io/ristretto/z/simd/search_amd64.s | 60 + .../ristretto/z/simd/stub_search_amd64.go | 6 + vendor/github.com/dgraph-io/ristretto/z/z.go | 151 +++ vendor/github.com/golang/glog/LICENSE | 191 +++ vendor/github.com/golang/glog/README.md | 36 + vendor/github.com/golang/glog/glog.go | 1180 +++++++++++++++++ vendor/github.com/golang/glog/glog_file.go | 124 ++ .../prometheus/model/labels/regexp.go | 24 +- .../prometheus/util/testutil/testing.go | 4 + vendor/modules.txt | 15 +- 60 files changed, 8326 insertions(+), 15 deletions(-) create mode 100644 vendor/github.com/DmitriyVTitov/size/.gitignore create mode 100644 vendor/github.com/DmitriyVTitov/size/LICENSE create mode 100644 vendor/github.com/DmitriyVTitov/size/README.md create mode 100644 vendor/github.com/DmitriyVTitov/size/size.go create mode 100644 vendor/github.com/dgraph-io/ristretto/.deepsource.toml create mode 100644 vendor/github.com/dgraph-io/ristretto/.go-version create mode 100644 vendor/github.com/dgraph-io/ristretto/.golangci.yml create mode 100644 vendor/github.com/dgraph-io/ristretto/CHANGELOG.md create mode 100644 vendor/github.com/dgraph-io/ristretto/LICENSE create mode 100644 vendor/github.com/dgraph-io/ristretto/README.md create mode 100644 vendor/github.com/dgraph-io/ristretto/cache.go create mode 100644 vendor/github.com/dgraph-io/ristretto/policy.go create mode 100644 vendor/github.com/dgraph-io/ristretto/ring.go create mode 100644 vendor/github.com/dgraph-io/ristretto/sketch.go create mode 100644 vendor/github.com/dgraph-io/ristretto/store.go create mode 100644 vendor/github.com/dgraph-io/ristretto/ttl.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/LICENSE create mode 100644 vendor/github.com/dgraph-io/ristretto/z/README.md create mode 100644 vendor/github.com/dgraph-io/ristretto/z/allocator.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/bbloom.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/btree.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/buffer.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/calloc.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/calloc_32bit.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/calloc_64bit.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/calloc_jemalloc.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/calloc_nojemalloc.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/file.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/file_default.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/file_linux.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/flags.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/histogram.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap_darwin.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap_linux.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap_plan9.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap_unix.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mmap_windows.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mremap_linux.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/mremap_linux_arm64.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/rtutil.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/rtutil.s create mode 100644 vendor/github.com/dgraph-io/ristretto/z/simd/baseline.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/simd/search.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/simd/search_amd64.s create mode 100644 vendor/github.com/dgraph-io/ristretto/z/simd/stub_search_amd64.go create mode 100644 vendor/github.com/dgraph-io/ristretto/z/z.go create mode 100644 vendor/github.com/golang/glog/LICENSE create mode 100644 vendor/github.com/golang/glog/README.md create mode 100644 vendor/github.com/golang/glog/glog.go create mode 100644 vendor/github.com/golang/glog/glog_file.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bc7b1fdc9..d3c30bc554 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ * [ENHANCEMENT] Ingester: improve performance when Active Series Tracker is in use. #4717 * [ENHANCEMENT] Store-gateway: optionally select `-blocks-storage.bucket-store.series-selection-strategy`, which can limit the impact of large posting lists (when many series share the same label name and value). #4667 #4695 #4698 * [ENHANCEMENT] Querier: Cache the converted float histogram from chunk iterator, hence there is no need to lookup chunk every time to get the converted float histogram. #4684 +* [ENHANCEMENT] Improved memory limit on the in-memory cache used for regular expression matchers. #4751 * [BUGFIX] Querier: Streaming remote read will now continue to return multiple chunks per frame after the first frame. #4423 * [BUGFIX] Store-gateway: the values for `stage="processed"` for the metrics `cortex_bucket_store_series_data_touched` and `cortex_bucket_store_series_data_size_touched_bytes` when using fine-grained chunks caching is now reporting the correct values of chunks held in memory. #4449 * [BUGFIX] Compactor: fixed reporting a compaction error when compactor is correctly shut down while populating blocks. #4580 diff --git a/go.mod b/go.mod index 161900e0b8..527b9e53e3 100644 --- a/go.mod +++ b/go.mod @@ -100,6 +100,7 @@ require ( cloud.google.com/go/compute v1.18.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v0.12.0 // indirect + github.com/DmitriyVTitov/size v1.5.0 // indirect github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect @@ -124,6 +125,7 @@ require ( github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dgraph-io/ristretto v0.1.1 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dlclark/regexp2 v1.4.0 // indirect github.com/dnaeon/go-vcr v1.2.0 // indirect @@ -150,6 +152,7 @@ require ( github.com/gofrs/uuid v4.3.1+incompatible // indirect github.com/gogo/googleapis v1.4.1 // indirect github.com/golang-jwt/jwt/v4 v4.5.0 // indirect + github.com/golang/glog v1.0.0 // indirect github.com/google/btree v1.0.1 // indirect github.com/google/gnostic v0.6.9 // indirect github.com/google/go-querystring v1.1.0 // indirect @@ -241,7 +244,7 @@ require ( ) // Using a fork of Prometheus with Mimir-specific changes. -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230413082406-8ef48ad9a7f0 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230417132058-c461e223418b // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index 28ec3da0d7..f75af07f69 100644 --- a/go.sum +++ b/go.sum @@ -92,6 +92,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v0.8.1/go.mod h1:4qFo github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g= +github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= @@ -216,6 +218,10 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= +github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWajOK8= +github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/digitalocean/godo v1.98.0 h1:potyC1eD0N9n5/P4/WmJuKgg+OGYZOBWEW+/aKTX6QQ= @@ -389,6 +395,8 @@ github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOW github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -521,8 +529,8 @@ github.com/grafana/gomemcache v0.0.0-20230316202710-a081dae0aba9 h1:WB3bGH2f1UN6 github.com/grafana/gomemcache v0.0.0-20230316202710-a081dae0aba9/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20230413082406-8ef48ad9a7f0 h1:QJXGRJ8CHrVH7vcQziGknvDVHcPIJb0CkuR3R7oRUKo= -github.com/grafana/mimir-prometheus v0.0.0-20230413082406-8ef48ad9a7f0/go.mod h1:Zr54urUcu17EERTlF3xrU6OSmYEjfwzdBJWNIfIP+vk= +github.com/grafana/mimir-prometheus v0.0.0-20230417132058-c461e223418b h1:El+0EImX0ZSFKzy0NnvuFILGv3khlEnJmRSK58p5F9M= +github.com/grafana/mimir-prometheus v0.0.0-20230417132058-c461e223418b/go.mod h1:nOnMC6vOTtyXwYaSWAUtfqtdlxfxkQvW7niVgJqcJxY= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 h1:A3dhViTeFDSQcGOXuUi6ukCQSMyDtDISBp2z6OOo2YM= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= @@ -1267,6 +1275,7 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/storegateway/indexheader/index/symbols_test.go b/pkg/storegateway/indexheader/index/symbols_test.go index 187b9c8869..8c70f621bf 100644 --- a/pkg/storegateway/indexheader/index/symbols_test.go +++ b/pkg/storegateway/indexheader/index/symbols_test.go @@ -16,13 +16,13 @@ import ( "github.com/prometheus/prometheus/tsdb/encoding" "github.com/prometheus/prometheus/tsdb/index" "github.com/stretchr/testify/require" - "go.uber.org/goleak" streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding" + "github.com/grafana/mimir/pkg/util/test" ) func TestMain(m *testing.M) { - goleak.VerifyTestMain(m) + test.VerifyNoLeakTestMain(m) } func TestSymbols(t *testing.T) { diff --git a/pkg/storegateway/postings_codec_test.go b/pkg/storegateway/postings_codec_test.go index 4a5300e312..fccf032032 100644 --- a/pkg/storegateway/postings_codec_test.go +++ b/pkg/storegateway/postings_codec_test.go @@ -21,6 +21,7 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/index" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/grafana/mimir/pkg/storegateway/indexcache" ) @@ -184,8 +185,15 @@ func TestDiffVarintMatchersCodec(t *testing.T) { decodedPostings, decodedMatchers, err := diffVarintSnappyMatchersDecode(data) assert.NoError(t, err) + if assert.Len(t, decodedMatchers, len(matchers)) && len(matchers) > 0 { - assert.Equal(t, matchers, decodedMatchers) + // Assert same matchers. We do some optimizations in mimir-prometheus which make + // the label matchers not comparable with reflect.DeepEqual() so we're going to + // compare their string representation. + require.Len(t, decodedMatchers, len(matchers)) + for i := 0; i < len(matchers); i++ { + assert.Equal(t, matchers[i].String(), decodedMatchers[i].String()) + } } p.reset() diff --git a/pkg/util/test/leak.go b/pkg/util/test/leak.go index a2a07d89d9..31f8c4798a 100644 --- a/pkg/util/test/leak.go +++ b/pkg/util/test/leak.go @@ -28,5 +28,11 @@ func goLeakOptions() []goleak.Option { // it gets closed when we close the BucketStore. However, we currently don't close BucketStore // on store-gateway termination so it never gets terminated. goleak.IgnoreTopFunction("github.com/grafana/mimir/pkg/storegateway/indexheader.NewReaderPool.func1"), + + // The FastRegexMatcher uses a global instance of ristretto.Cache which is never stopped, + // so we ignore its gouroutines and then ones from glog which is a ristretto dependency. + goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"), + goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"), + goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"), } } diff --git a/vendor/github.com/DmitriyVTitov/size/.gitignore b/vendor/github.com/DmitriyVTitov/size/.gitignore new file mode 100644 index 0000000000..ae4c566d28 --- /dev/null +++ b/vendor/github.com/DmitriyVTitov/size/.gitignore @@ -0,0 +1,19 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +example +.idea +go.sum \ No newline at end of file diff --git a/vendor/github.com/DmitriyVTitov/size/LICENSE b/vendor/github.com/DmitriyVTitov/size/LICENSE new file mode 100644 index 0000000000..507b0b74cb --- /dev/null +++ b/vendor/github.com/DmitriyVTitov/size/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Dmitriy Titov (Дмитрий Титов) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/DmitriyVTitov/size/README.md b/vendor/github.com/DmitriyVTitov/size/README.md new file mode 100644 index 0000000000..215a749cc0 --- /dev/null +++ b/vendor/github.com/DmitriyVTitov/size/README.md @@ -0,0 +1,48 @@ +# size - calculates variable's memory consumption at runtime + +### Part of the [Transflow Project](http://transflow.ru/) + +Sometimes you may need a tool to measure the size of object in your Go program at runtime. This package makes an attempt to do so. Package based on `binary.Size()` from Go standard library. + +Features: +- supports non-fixed size variables and struct fields: `struct`, `int`, `slice`, `string`, `map`; +- supports complex types including structs with non-fixed size fields; +- supports all basic types (numbers, bool); +- supports `chan` and `interface`; +- supports pointers; +- implements infinite recursion detection (i.e. pointer inside struct field references to parent struct). + +### Usage example + +``` +package main + +import ( + "fmt" + + // Use latest tag. + "github.com/DmitriyVTitov/size" +) + +func main() { + a := struct { + a int + b string + c bool + d int32 + e []byte + f [3]int64 + }{ + a: 10, // 8 bytes + b: "Text", // 16 (string itself) + 4 = 20 bytes + c: true, // 1 byte + d: 25, // 4 bytes + e: []byte{'c', 'd', 'e'}, // 24 (slice itself) + 3 = 27 bytes + f: [3]int64{1, 2, 3}, // 3 * 8 = 24 bytes + } // 84 + 3 (padding) = 87 bytes + + fmt.Println(size.Of(a)) +} + +// Output: 87 +``` diff --git a/vendor/github.com/DmitriyVTitov/size/size.go b/vendor/github.com/DmitriyVTitov/size/size.go new file mode 100644 index 0000000000..b52e2c41d4 --- /dev/null +++ b/vendor/github.com/DmitriyVTitov/size/size.go @@ -0,0 +1,142 @@ +// Package size implements run-time calculation of size of the variable. +// Source code is based on "binary.Size()" function from Go standard library. +// size.Of() omits size of slices, arrays and maps containers itself (24, 24 and 8 bytes). +// When counting maps separate calculations are done for keys and values. +package size + +import ( + "reflect" + "unsafe" +) + +// Of returns the size of 'v' in bytes. +// If there is an error during calculation, Of returns -1. +func Of(v interface{}) int { + // Cache with every visited pointer so we don't count two pointers + // to the same memory twice. + cache := make(map[uintptr]bool) + return sizeOf(reflect.Indirect(reflect.ValueOf(v)), cache) +} + +// sizeOf returns the number of bytes the actual data represented by v occupies in memory. +// If there is an error, sizeOf returns -1. +func sizeOf(v reflect.Value, cache map[uintptr]bool) int { + switch v.Kind() { + + case reflect.Array: + sum := 0 + for i := 0; i < v.Len(); i++ { + s := sizeOf(v.Index(i), cache) + if s < 0 { + return -1 + } + sum += s + } + + return sum + (v.Cap()-v.Len())*int(v.Type().Elem().Size()) + + case reflect.Slice: + // return 0 if this node has been visited already + if cache[v.Pointer()] { + return 0 + } + cache[v.Pointer()] = true + + sum := 0 + for i := 0; i < v.Len(); i++ { + s := sizeOf(v.Index(i), cache) + if s < 0 { + return -1 + } + sum += s + } + + sum += (v.Cap() - v.Len()) * int(v.Type().Elem().Size()) + + return sum + int(v.Type().Size()) + + case reflect.Struct: + sum := 0 + for i, n := 0, v.NumField(); i < n; i++ { + s := sizeOf(v.Field(i), cache) + if s < 0 { + return -1 + } + sum += s + } + + // Look for struct padding. + padding := int(v.Type().Size()) + for i, n := 0, v.NumField(); i < n; i++ { + padding -= int(v.Field(i).Type().Size()) + } + + return sum + padding + + case reflect.String: + s := v.String() + hdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) + if cache[hdr.Data] { + return int(v.Type().Size()) + } + cache[hdr.Data] = true + return len(s) + int(v.Type().Size()) + + case reflect.Ptr: + // return Ptr size if this node has been visited already (infinite recursion) + if cache[v.Pointer()] { + return int(v.Type().Size()) + } + cache[v.Pointer()] = true + if v.IsNil() { + return int(reflect.New(v.Type()).Type().Size()) + } + s := sizeOf(reflect.Indirect(v), cache) + if s < 0 { + return -1 + } + return s + int(v.Type().Size()) + + case reflect.Bool, + reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Int, reflect.Uint, + reflect.Chan, + reflect.Uintptr, + reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128, + reflect.Func: + return int(v.Type().Size()) + + case reflect.Map: + // return 0 if this node has been visited already (infinite recursion) + if cache[v.Pointer()] { + return 0 + } + cache[v.Pointer()] = true + sum := 0 + keys := v.MapKeys() + for i := range keys { + val := v.MapIndex(keys[i]) + // calculate size of key and value separately + sv := sizeOf(val, cache) + if sv < 0 { + return -1 + } + sum += sv + sk := sizeOf(keys[i], cache) + if sk < 0 { + return -1 + } + sum += sk + } + // Include overhead due to unused map buckets. 10.79 comes + // from https://golang.org/src/runtime/map.go. + return sum + int(v.Type().Size()) + int(float64(len(keys))*10.79) + + case reflect.Interface: + return sizeOf(v.Elem(), cache) + int(v.Type().Size()) + + } + + return -1 +} diff --git a/vendor/github.com/dgraph-io/ristretto/.deepsource.toml b/vendor/github.com/dgraph-io/ristretto/.deepsource.toml new file mode 100644 index 0000000000..40609eff3f --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/.deepsource.toml @@ -0,0 +1,17 @@ +version = 1 + +test_patterns = [ + '**/*_test.go' +] + +exclude_patterns = [ + +] + +[[analyzers]] +name = 'go' +enabled = true + + + [analyzers.meta] + import_path = 'github.com/dgraph-io/ristretto' diff --git a/vendor/github.com/dgraph-io/ristretto/.go-version b/vendor/github.com/dgraph-io/ristretto/.go-version new file mode 100644 index 0000000000..b8f1e3fd3e --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/.go-version @@ -0,0 +1 @@ +1.17.11 diff --git a/vendor/github.com/dgraph-io/ristretto/.golangci.yml b/vendor/github.com/dgraph-io/ristretto/.golangci.yml new file mode 100644 index 0000000000..7318e9a3b6 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/.golangci.yml @@ -0,0 +1,23 @@ +run: + tests: false + skip-dirs: + - contrib + - sim + +linters-settings: + lll: + line-length: 120 + +linters: + disable-all: true + enable: + #- errcheck + #- ineffassign + - gas + #- gofmt + #- golint + #- gosimple + #- govet + - lll + #- varcheck + #- unused diff --git a/vendor/github.com/dgraph-io/ristretto/CHANGELOG.md b/vendor/github.com/dgraph-io/ristretto/CHANGELOG.md new file mode 100644 index 0000000000..3d18e39ed6 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/CHANGELOG.md @@ -0,0 +1,187 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project will adhere to [Semantic Versioning](http://semver.org/spec/v2.0.0.html) starting v1.0.0. + +## [0.1.1] - 2022-10-12 + +[0.1.1]: https://github.com/dgraph-io/ristretto/compare/v0.1.0..v0.1.1 +This release fixes certain arm64 build issues in the z package. It also +incorporates CI steps in our repository. + +### Changed +- [chore(docs): Include SpiceDB in the list of projects using Ristretto (#285)](https://github.com/dgraph-io/ristretto/pull/311) + +### Added +- [Run CI Jobs via Github Actions #304](https://github.com/dgraph-io/ristretto/pull/304) + +### Fixed +- [fix(build): update x/sys dependency](https://github.com/dgraph-io/ristretto/pull/308) +- [fix(z): Address inconsistent mremap return arguments with arm64](https://github.com/dgraph-io/ristretto/pull/309) +- [fix(z): runtime error: index out of range for !amd64 env #287](https://github.com/dgraph-io/ristretto/pull/307) + +## [0.1.0] - 2021-06-03 + +[0.1.0]: https://github.com/dgraph-io/ristretto/compare/v0.0.3..v0.1.0 +This release contains bug fixes and improvements to Ristretto. It also contains +major updates to the z package. The z package contains types such as Tree (B+ +tree), Buffer, Mmap file, etc. All these types are used in Badger and Dgraph to +improve performance and reduce memory requirements. + +### Changed +- Make item public. Add a new onReject call for rejected items. (#180) + +### Added +- Use z.Buffer backing for B+ tree (#268) +- expose GetTTL function (#270) +- docs(README): Ristretto is production-ready. (#267) +- Add IterateKV (#265) +- feat(super-flags): Add GetPath method in superflags (#258) +- add GetDuration to SuperFlag (#248) +- add Has, GetFloat64, and GetInt64 to SuperFlag (#247) +- move SuperFlag to Ristretto (#246) +- add SuperFlagHelp tool to generate flag help text (#251) +- allow empty defaults in SuperFlag (#254) +- add mmaped b+ tree (#207) +- Add API to allow the MaxCost of an existing cache to be updated. (#200) +- Add OnExit handler which can be used for manual memory management (#183) +- Add life expectancy histogram (#182) +- Add mechanism to wait for items to be processed. (#184) + +### Fixed +- change expiration type from int64 to time.Time (#277) +- fix(buffer): make buffer capacity atleast defaultCapacity (#273) +- Fixes for z.PersistentTree (#272) +- Initialize persistent tree correctly (#271) +- use xxhash v2 (#266) +- update comments to correctly reflect counter space usage (#189) +- enable riscv64 builds (#264) +- Switch from log to glog (#263) +- Use Fibonacci for latency numbers +- cache: fix race when clearning a cache (#261) +- Check for keys without values in superflags (#259) +- chore(perf): using tags instead of runtime callers to improve the performance of leak detection (#255) +- fix(Flags): panic on user errors (#256) +- fix SuperFlagHelp newline (#252) +- fix(arm): Fix crashing under ARMv6 due to memory mis-alignment (#239) +- Fix incorrect unit test coverage depiction (#245) +- chore(histogram): adding percentile in histogram (#241) +- fix(windows): use filepath instead of path (#244) +- fix(MmapFile): Close the fd before deleting the file (#242) +- Fixes CGO_ENABLED=0 compilation error (#240) +- fix(build): fix build on non-amd64 architectures (#238) +- fix(b+tree): Do not double the size of btree (#237) +- fix(jemalloc): Fix the stats of jemalloc (#236) +- Don't print stuff, only return strings. +- Bring memclrNoHeapPointers to z (#235) +- increase number of buffers from 32 to 64 in allocator (#234) +- Set minSize to 1MB. +- Opt(btree): Use Go memory instead of mmap files +- Opt(btree): Lightweight stats calculation +- Put padding internally to z.Buffer +- Chore(z): Add SetTmpDir API to set the temp directory (#233) +- Add a BufferFrom +- Bring z.Allocator and z.AllocatorPool back +- Fix(z.Allocator): Make Allocator use Go memory +- Updated ZeroOut to use a simple for loop. (#231) +- Add concurrency back +- Add a test to check concurrency of Allocator. +- Fix(buffer): Expose padding by z.Buffer's APIs and fix test (#222) +- AllocateSlice should Truncate if the file is not big enough (#226) +- Zero out allocations for structs now that we're reusing Allocators. +- Fix the ristretto substring +- Deal with nil z.AllocatorPool +- Create an AllocatorPool class. +- chore(btree): clean NewTree API (#225) +- fix(MmapFile): Don't error out if fileSize > sz (#224) +- feat(btree): allow option to reset btree and mmaping it to specified file. (#223) +- Use mremap on Linux instead of munmap+mmap (#221) +- Reuse pages in B+ tree (#220) +- fix(allocator): make nil allocator return go byte slice (#217) +- fix(buffer): Make padding internal to z.buffer (#216) +- chore(buffer): add a parent directory field in z.Buffer (#215) +- Make Allocator concurrent +- Fix infinite loop in allocator (#214) +- Add trim func +- Use allocator pool. Turn off freelist. +- Add freelists to Allocator to reuse. +- make DeleteBelow delete values that are less than lo (#211) +- Avoid an unnecessary Load procedure in IncrementOffset. +- Add Stats method in Btree. +- chore(script): fix local test script (#210) +- fix(btree): Increase buffer size if needed. (#209) +- chore(btree): add occupancy ratio, search benchmark and compact bug fix (#208) +- Add licenses, remove prints, and fix a bug in compact +- Add IncrementOffset API for z.buffers (#206) +- Show count when printing histogram (#201) +- Zbuffer: Add LenNoPadding and make padding 8 bytes (#204) +- Allocate Go memory in case allocator is nil. +- Add leak detection via leak build flag and fix a leak during cache.Close. +- Add some APIs for allocator and buffer +- Sync before truncation or close. +- Handle nil MmapFile for Sync. +- Public methods must not panic after Close() (#202) +- Check for RD_ONLY correctly. +- Modify MmapFile APIs +- Add a bunch of APIs around MmapFile +- Move APIs for mmapfile creation over to z package. +- Add ZeroOut func +- Add SliceOffsets +- z: Add TotalSize method on bloom filter (#197) +- Add Msync func +- Buffer: Use 256 GB mmap size instead of MaxInt64 (#198) +- Add a simple test to check next2Pow +- Improve memory performance (#195) +- Have a way to automatically mmap a growing buffer (#196) +- Introduce Mmapped buffers and Merge Sort (#194) +- Add a way to access an allocator via reference. +- Use jemalloc.a to ensure compilation with the Go binary +- Fix up a build issue with ReadMemStats +- Add ReadMemStats function (#193) +- Allocator helps allocate memory to be used by unsafe structs (#192) +- Improve histogram output +- Move Closer from y to z (#191) +- Add histogram.Mean() method (#188) +- Introduce Calloc: Manual Memory Management via jemalloc (#186) + +## [0.0.3] - 2020-07-06 + +[0.0.3]: https://github.com/dgraph-io/ristretto/compare/v0.0.2..v0.0.3 + +### Changed + +### Added + +### Fixed + +- z: use MemHashString and xxhash.Sum64String ([#153][]) +- Check conflict key before updating expiration map. ([#154][]) +- Fix race condition in Cache.Clear ([#133][]) +- Improve handling of updated items ([#168][]) +- Fix droppedSets count while updating the item ([#171][]) + +## [0.0.2] - 2020-02-24 + +[0.0.2]: https://github.com/dgraph-io/ristretto/compare/v0.0.1..v0.0.2 + +### Added + +- Sets with TTL. ([#122][]) + +### Fixed + +- Fix the way metrics are handled for deletions. ([#111][]) +- Support nil `*Cache` values in `Clear` and `Close`. ([#119][]) +- Delete item immediately. ([#113][]) +- Remove key from policy after TTL eviction. ([#130][]) + +[#111]: https://github.com/dgraph-io/ristretto/issues/111 +[#113]: https://github.com/dgraph-io/ristretto/issues/113 +[#119]: https://github.com/dgraph-io/ristretto/issues/119 +[#122]: https://github.com/dgraph-io/ristretto/issues/122 +[#130]: https://github.com/dgraph-io/ristretto/issues/130 + +## 0.0.1 + +First release. Basic cache functionality based on a LFU policy. diff --git a/vendor/github.com/dgraph-io/ristretto/LICENSE b/vendor/github.com/dgraph-io/ristretto/LICENSE new file mode 100644 index 0000000000..d9a10c0d8e --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/LICENSE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/vendor/github.com/dgraph-io/ristretto/README.md b/vendor/github.com/dgraph-io/ristretto/README.md new file mode 100644 index 0000000000..e71ae3df9e --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/README.md @@ -0,0 +1,220 @@ +# Ristretto +[![Go Doc](https://img.shields.io/badge/godoc-reference-blue.svg)](http://godoc.org/github.com/dgraph-io/ristretto) +[![ci-ristretto-tests](https://github.com/dgraph-io/ristretto/actions/workflows/ci-ristretto-tests.yml/badge.svg)](https://github.com/dgraph-io/ristretto/actions/workflows/ci-ristretto-tests.yml) +[![ci-ristretto-lint](https://github.com/dgraph-io/ristretto/actions/workflows/ci-ristretto-lint.yml/badge.svg)](https://github.com/dgraph-io/ristretto/actions/workflows/ci-ristretto-lint.yml) +[![Coverage Status](https://coveralls.io/repos/github/dgraph-io/ristretto/badge.svg?branch=main)](https://coveralls.io/github/dgraph-io/ristretto?branch=main) +[![Go Report Card](https://img.shields.io/badge/go%20report-A%2B-brightgreen)](https://goreportcard.com/report/github.com/dgraph-io/ristretto) + +Ristretto is a fast, concurrent cache library built with a focus on performance and correctness. + +The motivation to build Ristretto comes from the need for a contention-free +cache in [Dgraph][]. + +[Dgraph]: https://github.com/dgraph-io/dgraph + +## Features + +* **High Hit Ratios** - with our unique admission/eviction policy pairing, Ristretto's performance is best in class. + * **Eviction: SampledLFU** - on par with exact LRU and better performance on Search and Database traces. + * **Admission: TinyLFU** - extra performance with little memory overhead (12 bits per counter). +* **Fast Throughput** - we use a variety of techniques for managing contention and the result is excellent throughput. +* **Cost-Based Eviction** - any large new item deemed valuable can evict multiple smaller items (cost could be anything). +* **Fully Concurrent** - you can use as many goroutines as you want with little throughput degradation. +* **Metrics** - optional performance metrics for throughput, hit ratios, and other stats. +* **Simple API** - just figure out your ideal `Config` values and you're off and running. + +## Status + +Ristretto is production-ready. See [Projects using Ristretto](#projects-using-ristretto). + +## Table of Contents + +* [Usage](#Usage) + * [Example](#Example) + * [Config](#Config) + * [NumCounters](#Config) + * [MaxCost](#Config) + * [BufferItems](#Config) + * [Metrics](#Config) + * [OnEvict](#Config) + * [KeyToHash](#Config) + * [Cost](#Config) +* [Benchmarks](#Benchmarks) + * [Hit Ratios](#Hit-Ratios) + * [Search](#Search) + * [Database](#Database) + * [Looping](#Looping) + * [CODASYL](#CODASYL) + * [Throughput](#Throughput) + * [Mixed](#Mixed) + * [Read](#Read) + * [Write](#Write) +* [Projects using Ristretto](#projects-using-ristretto) +* [FAQ](#FAQ) + +## Usage + +### Example + +```go +func main() { + cache, err := ristretto.NewCache(&ristretto.Config{ + NumCounters: 1e7, // number of keys to track frequency of (10M). + MaxCost: 1 << 30, // maximum cost of cache (1GB). + BufferItems: 64, // number of keys per Get buffer. + }) + if err != nil { + panic(err) + } + + // set a value with a cost of 1 + cache.Set("key", "value", 1) + + // wait for value to pass through buffers + time.Sleep(10 * time.Millisecond) + + value, found := cache.Get("key") + if !found { + panic("missing value") + } + fmt.Println(value) + cache.Del("key") +} +``` + +### Config + +The `Config` struct is passed to `NewCache` when creating Ristretto instances (see the example above). + +**NumCounters** `int64` + +NumCounters is the number of 4-bit access counters to keep for admission and eviction. We've seen good performance in setting this to 10x the number of items you expect to keep in the cache when full. + +For example, if you expect each item to have a cost of 1 and MaxCost is 100, set NumCounters to 1,000. Or, if you use variable cost values but expect the cache to hold around 10,000 items when full, set NumCounters to 100,000. The important thing is the *number of unique items* in the full cache, not necessarily the MaxCost value. + +**MaxCost** `int64` + +MaxCost is how eviction decisions are made. For example, if MaxCost is 100 and a new item with a cost of 1 increases total cache cost to 101, 1 item will be evicted. + +MaxCost can also be used to denote the max size in bytes. For example, if MaxCost is 1,000,000 (1MB) and the cache is full with 1,000 1KB items, a new item (that's accepted) would cause 5 1KB items to be evicted. + +MaxCost could be anything as long as it matches how you're using the cost values when calling Set. + +**BufferItems** `int64` + +BufferItems is the size of the Get buffers. The best value we've found for this is 64. + +If for some reason you see Get performance decreasing with lots of contention (you shouldn't), try increasing this value in increments of 64. This is a fine-tuning mechanism and you probably won't have to touch this. + +**Metrics** `bool` + +Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead. + +**OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)` + +OnEvict is called for every eviction. + +**KeyToHash** `func(key interface{}) [2]uint64` + +KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41). + +Note that if you want 128bit hashes you should use the full `[2]uint64`, +otherwise just fill the `uint64` at the `0` position and it will behave like +any 64bit hash. + +**Cost** `func(value interface{}) int64` + +Cost is an optional function you can pass to the Config in order to evaluate +item cost at runtime, and only for the Set calls that aren't dropped (this is +useful if calculating item cost is particularly expensive and you don't want to +waste time on items that will be dropped anyways). + +To signal to Ristretto that you'd like to use this Cost function: + +1. Set the Cost field to a non-nil function. +2. When calling Set for new items or item updates, use a `cost` of 0. + +## Benchmarks + +The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto. + +### Hit Ratios + +#### Search + +This trace is described as "disk read accesses initiated by a large commercial +search engine in response to various web search requests." + +

+ +

+ +#### Database + +This trace is described as "a database server running at a commercial site +running an ERP application on top of a commercial database." + +

+ +

+ +#### Looping + +This trace demonstrates a looping access pattern. + +

+ +

+ +#### CODASYL + +This trace is described as "references to a CODASYL database for a one hour +period." + +

+ +

+ +### Throughput + +All throughput benchmarks were ran on an Intel Core i7-8700K (3.7GHz) with 16gb +of RAM. + +#### Mixed + +

+ +

+ +#### Read + +

+ +

+ +#### Write + +

+ +

+ +## Projects Using Ristretto + +Below is a list of known projects that use Ristretto: + +- [Badger](https://github.com/dgraph-io/badger) - Embeddable key-value DB in Go +- [Dgraph](https://github.com/dgraph-io/dgraph) - Horizontally scalable and distributed GraphQL database with a graph backend +- [Vitess](https://github.com/vitessio/vitess) - Database clustering system for horizontal scaling of MySQL +- [SpiceDB](https://github.com/authzed/spicedb) - Horizontally scalable permissions database + +## FAQ + +### How are you achieving this performance? What shortcuts are you taking? + +We go into detail in the [Ristretto blog post](https://blog.dgraph.io/post/introducing-ristretto-high-perf-go-cache/), but in short: our throughput performance can be attributed to a mix of batching and eventual consistency. Our hit ratio performance is mostly due to an excellent [admission policy](https://arxiv.org/abs/1512.00727) and SampledLFU eviction policy. + +As for "shortcuts," the only thing Ristretto does that could be construed as one is dropping some Set calls. That means a Set call for a new item (updates are guaranteed) isn't guaranteed to make it into the cache. The new item could be dropped at two points: when passing through the Set buffer or when passing through the admission policy. However, this doesn't affect hit ratios much at all as we expect the most popular items to be Set multiple times and eventually make it in the cache. + +### Is Ristretto distributed? + +No, it's just like any other Go library that you can import into your project and use in a single process. diff --git a/vendor/github.com/dgraph-io/ristretto/cache.go b/vendor/github.com/dgraph-io/ristretto/cache.go new file mode 100644 index 0000000000..7226245bcc --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/cache.go @@ -0,0 +1,719 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ristretto is a fast, fixed size, in-memory cache with a dual focus on +// throughput and hit ratio performance. You can easily add Ristretto to an +// existing system and keep the most valuable data where you need it. +package ristretto + +import ( + "bytes" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + "unsafe" + + "github.com/dgraph-io/ristretto/z" +) + +var ( + // TODO: find the optimal value for this or make it configurable + setBufSize = 32 * 1024 +) + +type itemCallback func(*Item) + +const itemSize = int64(unsafe.Sizeof(storeItem{})) + +// Cache is a thread-safe implementation of a hashmap with a TinyLFU admission +// policy and a Sampled LFU eviction policy. You can use the same Cache instance +// from as many goroutines as you want. +type Cache struct { + // store is the central concurrent hashmap where key-value items are stored. + store store + // policy determines what gets let in to the cache and what gets kicked out. + policy policy + // getBuf is a custom ring buffer implementation that gets pushed to when + // keys are read. + getBuf *ringBuffer + // setBuf is a buffer allowing us to batch/drop Sets during times of high + // contention. + setBuf chan *Item + // onEvict is called for item evictions. + onEvict itemCallback + // onReject is called when an item is rejected via admission policy. + onReject itemCallback + // onExit is called whenever a value goes out of scope from the cache. + onExit (func(interface{})) + // KeyToHash function is used to customize the key hashing algorithm. + // Each key will be hashed using the provided function. If keyToHash value + // is not set, the default keyToHash function is used. + keyToHash func(interface{}) (uint64, uint64) + // stop is used to stop the processItems goroutine. + stop chan struct{} + // indicates whether cache is closed. + isClosed bool + // cost calculates cost from a value. + cost func(value interface{}) int64 + // ignoreInternalCost dictates whether to ignore the cost of internally storing + // the item in the cost calculation. + ignoreInternalCost bool + // cleanupTicker is used to periodically check for entries whose TTL has passed. + cleanupTicker *time.Ticker + // Metrics contains a running log of important statistics like hits, misses, + // and dropped items. + Metrics *Metrics +} + +// Config is passed to NewCache for creating new Cache instances. +type Config struct { + // NumCounters determines the number of counters (keys) to keep that hold + // access frequency information. It's generally a good idea to have more + // counters than the max cache capacity, as this will improve eviction + // accuracy and subsequent hit ratios. + // + // For example, if you expect your cache to hold 1,000,000 items when full, + // NumCounters should be 10,000,000 (10x). Each counter takes up roughly + // 3 bytes (4 bits for each counter * 4 copies plus about a byte per + // counter for the bloom filter). Note that the number of counters is + // internally rounded up to the nearest power of 2, so the space usage + // may be a little larger than 3 bytes * NumCounters. + NumCounters int64 + // MaxCost can be considered as the cache capacity, in whatever units you + // choose to use. + // + // For example, if you want the cache to have a max capacity of 100MB, you + // would set MaxCost to 100,000,000 and pass an item's number of bytes as + // the `cost` parameter for calls to Set. If new items are accepted, the + // eviction process will take care of making room for the new item and not + // overflowing the MaxCost value. + MaxCost int64 + // BufferItems determines the size of Get buffers. + // + // Unless you have a rare use case, using `64` as the BufferItems value + // results in good performance. + BufferItems int64 + // Metrics determines whether cache statistics are kept during the cache's + // lifetime. There *is* some overhead to keeping statistics, so you should + // only set this flag to true when testing or throughput performance isn't a + // major factor. + Metrics bool + // OnEvict is called for every eviction and passes the hashed key, value, + // and cost to the function. + OnEvict func(item *Item) + // OnReject is called for every rejection done via the policy. + OnReject func(item *Item) + // OnExit is called whenever a value is removed from cache. This can be + // used to do manual memory deallocation. Would also be called on eviction + // and rejection of the value. + OnExit func(val interface{}) + // KeyToHash function is used to customize the key hashing algorithm. + // Each key will be hashed using the provided function. If keyToHash value + // is not set, the default keyToHash function is used. + KeyToHash func(key interface{}) (uint64, uint64) + // Cost evaluates a value and outputs a corresponding cost. This function + // is ran after Set is called for a new item or an item update with a cost + // param of 0. + Cost func(value interface{}) int64 + // IgnoreInternalCost set to true indicates to the cache that the cost of + // internally storing the value should be ignored. This is useful when the + // cost passed to set is not using bytes as units. Keep in mind that setting + // this to true will increase the memory usage. + IgnoreInternalCost bool +} + +type itemFlag byte + +const ( + itemNew itemFlag = iota + itemDelete + itemUpdate +) + +// Item is passed to setBuf so items can eventually be added to the cache. +type Item struct { + flag itemFlag + Key uint64 + Conflict uint64 + Value interface{} + Cost int64 + Expiration time.Time + wg *sync.WaitGroup +} + +// NewCache returns a new Cache instance and any configuration errors, if any. +func NewCache(config *Config) (*Cache, error) { + switch { + case config.NumCounters == 0: + return nil, errors.New("NumCounters can't be zero") + case config.MaxCost == 0: + return nil, errors.New("MaxCost can't be zero") + case config.BufferItems == 0: + return nil, errors.New("BufferItems can't be zero") + } + policy := newPolicy(config.NumCounters, config.MaxCost) + cache := &Cache{ + store: newStore(), + policy: policy, + getBuf: newRingBuffer(policy, config.BufferItems), + setBuf: make(chan *Item, setBufSize), + keyToHash: config.KeyToHash, + stop: make(chan struct{}), + cost: config.Cost, + ignoreInternalCost: config.IgnoreInternalCost, + cleanupTicker: time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2), + } + cache.onExit = func(val interface{}) { + if config.OnExit != nil && val != nil { + config.OnExit(val) + } + } + cache.onEvict = func(item *Item) { + if config.OnEvict != nil { + config.OnEvict(item) + } + cache.onExit(item.Value) + } + cache.onReject = func(item *Item) { + if config.OnReject != nil { + config.OnReject(item) + } + cache.onExit(item.Value) + } + if cache.keyToHash == nil { + cache.keyToHash = z.KeyToHash + } + if config.Metrics { + cache.collectMetrics() + } + // NOTE: benchmarks seem to show that performance decreases the more + // goroutines we have running cache.processItems(), so 1 should + // usually be sufficient + go cache.processItems() + return cache, nil +} + +func (c *Cache) Wait() { + if c == nil || c.isClosed { + return + } + wg := &sync.WaitGroup{} + wg.Add(1) + c.setBuf <- &Item{wg: wg} + wg.Wait() +} + +// Get returns the value (if any) and a boolean representing whether the +// value was found or not. The value can be nil and the boolean can be true at +// the same time. +func (c *Cache) Get(key interface{}) (interface{}, bool) { + if c == nil || c.isClosed || key == nil { + return nil, false + } + keyHash, conflictHash := c.keyToHash(key) + c.getBuf.Push(keyHash) + value, ok := c.store.Get(keyHash, conflictHash) + if ok { + c.Metrics.add(hit, keyHash, 1) + } else { + c.Metrics.add(miss, keyHash, 1) + } + return value, ok +} + +// Set attempts to add the key-value item to the cache. If it returns false, +// then the Set was dropped and the key-value item isn't added to the cache. If +// it returns true, there's still a chance it could be dropped by the policy if +// its determined that the key-value item isn't worth keeping, but otherwise the +// item will be added and other items will be evicted in order to make room. +// +// To dynamically evaluate the items cost using the Config.Coster function, set +// the cost parameter to 0 and Coster will be ran when needed in order to find +// the items true cost. +func (c *Cache) Set(key, value interface{}, cost int64) bool { + return c.SetWithTTL(key, value, cost, 0*time.Second) +} + +// SetWithTTL works like Set but adds a key-value pair to the cache that will expire +// after the specified TTL (time to live) has passed. A zero value means the value never +// expires, which is identical to calling Set. A negative value is a no-op and the value +// is discarded. +func (c *Cache) SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool { + if c == nil || c.isClosed || key == nil { + return false + } + + var expiration time.Time + switch { + case ttl == 0: + // No expiration. + break + case ttl < 0: + // Treat this a a no-op. + return false + default: + expiration = time.Now().Add(ttl) + } + + keyHash, conflictHash := c.keyToHash(key) + i := &Item{ + flag: itemNew, + Key: keyHash, + Conflict: conflictHash, + Value: value, + Cost: cost, + Expiration: expiration, + } + // cost is eventually updated. The expiration must also be immediately updated + // to prevent items from being prematurely removed from the map. + if prev, ok := c.store.Update(i); ok { + c.onExit(prev) + i.flag = itemUpdate + } + // Attempt to send item to policy. + select { + case c.setBuf <- i: + return true + default: + if i.flag == itemUpdate { + // Return true if this was an update operation since we've already + // updated the store. For all the other operations (set/delete), we + // return false which means the item was not inserted. + return true + } + c.Metrics.add(dropSets, keyHash, 1) + return false + } +} + +// Del deletes the key-value item from the cache if it exists. +func (c *Cache) Del(key interface{}) { + if c == nil || c.isClosed || key == nil { + return + } + keyHash, conflictHash := c.keyToHash(key) + // Delete immediately. + _, prev := c.store.Del(keyHash, conflictHash) + c.onExit(prev) + // If we've set an item, it would be applied slightly later. + // So we must push the same item to `setBuf` with the deletion flag. + // This ensures that if a set is followed by a delete, it will be + // applied in the correct order. + c.setBuf <- &Item{ + flag: itemDelete, + Key: keyHash, + Conflict: conflictHash, + } +} + +// GetTTL returns the TTL for the specified key and a bool that is true if the +// item was found and is not expired. +func (c *Cache) GetTTL(key interface{}) (time.Duration, bool) { + if c == nil || key == nil { + return 0, false + } + + keyHash, conflictHash := c.keyToHash(key) + if _, ok := c.store.Get(keyHash, conflictHash); !ok { + // not found + return 0, false + } + + expiration := c.store.Expiration(keyHash) + if expiration.IsZero() { + // found but no expiration + return 0, true + } + + if time.Now().After(expiration) { + // found but expired + return 0, false + } + + return time.Until(expiration), true +} + +// Close stops all goroutines and closes all channels. +func (c *Cache) Close() { + if c == nil || c.isClosed { + return + } + c.Clear() + + // Block until processItems goroutine is returned. + c.stop <- struct{}{} + close(c.stop) + close(c.setBuf) + c.policy.Close() + c.isClosed = true +} + +// Clear empties the hashmap and zeroes all policy counters. Note that this is +// not an atomic operation (but that shouldn't be a problem as it's assumed that +// Set/Get calls won't be occurring until after this). +func (c *Cache) Clear() { + if c == nil || c.isClosed { + return + } + // Block until processItems goroutine is returned. + c.stop <- struct{}{} + + // Clear out the setBuf channel. +loop: + for { + select { + case i := <-c.setBuf: + if i.wg != nil { + i.wg.Done() + continue + } + if i.flag != itemUpdate { + // In itemUpdate, the value is already set in the store. So, no need to call + // onEvict here. + c.onEvict(i) + } + default: + break loop + } + } + + // Clear value hashmap and policy data. + c.policy.Clear() + c.store.Clear(c.onEvict) + // Only reset metrics if they're enabled. + if c.Metrics != nil { + c.Metrics.Clear() + } + // Restart processItems goroutine. + go c.processItems() +} + +// MaxCost returns the max cost of the cache. +func (c *Cache) MaxCost() int64 { + if c == nil { + return 0 + } + return c.policy.MaxCost() +} + +// UpdateMaxCost updates the maxCost of an existing cache. +func (c *Cache) UpdateMaxCost(maxCost int64) { + if c == nil { + return + } + c.policy.UpdateMaxCost(maxCost) +} + +// processItems is ran by goroutines processing the Set buffer. +func (c *Cache) processItems() { + startTs := make(map[uint64]time.Time) + numToKeep := 100000 // TODO: Make this configurable via options. + + trackAdmission := func(key uint64) { + if c.Metrics == nil { + return + } + startTs[key] = time.Now() + if len(startTs) > numToKeep { + for k := range startTs { + if len(startTs) <= numToKeep { + break + } + delete(startTs, k) + } + } + } + onEvict := func(i *Item) { + if ts, has := startTs[i.Key]; has { + c.Metrics.trackEviction(int64(time.Since(ts) / time.Second)) + delete(startTs, i.Key) + } + if c.onEvict != nil { + c.onEvict(i) + } + } + + for { + select { + case i := <-c.setBuf: + if i.wg != nil { + i.wg.Done() + continue + } + // Calculate item cost value if new or update. + if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { + i.Cost = c.cost(i.Value) + } + if !c.ignoreInternalCost { + // Add the cost of internally storing the object. + i.Cost += itemSize + } + + switch i.flag { + case itemNew: + victims, added := c.policy.Add(i.Key, i.Cost) + if added { + c.store.Set(i) + c.Metrics.add(keyAdd, i.Key, 1) + trackAdmission(i.Key) + } else { + c.onReject(i) + } + for _, victim := range victims { + victim.Conflict, victim.Value = c.store.Del(victim.Key, 0) + onEvict(victim) + } + + case itemUpdate: + c.policy.Update(i.Key, i.Cost) + + case itemDelete: + c.policy.Del(i.Key) // Deals with metrics updates. + _, val := c.store.Del(i.Key, i.Conflict) + c.onExit(val) + } + case <-c.cleanupTicker.C: + c.store.Cleanup(c.policy, onEvict) + case <-c.stop: + return + } + } +} + +// collectMetrics just creates a new *Metrics instance and adds the pointers +// to the cache and policy instances. +func (c *Cache) collectMetrics() { + c.Metrics = newMetrics() + c.policy.CollectMetrics(c.Metrics) +} + +type metricType int + +const ( + // The following 2 keep track of hits and misses. + hit = iota + miss + // The following 3 keep track of number of keys added, updated and evicted. + keyAdd + keyUpdate + keyEvict + // The following 2 keep track of cost of keys added and evicted. + costAdd + costEvict + // The following keep track of how many sets were dropped or rejected later. + dropSets + rejectSets + // The following 2 keep track of how many gets were kept and dropped on the + // floor. + dropGets + keepGets + // This should be the final enum. Other enums should be set before this. + doNotUse +) + +func stringFor(t metricType) string { + switch t { + case hit: + return "hit" + case miss: + return "miss" + case keyAdd: + return "keys-added" + case keyUpdate: + return "keys-updated" + case keyEvict: + return "keys-evicted" + case costAdd: + return "cost-added" + case costEvict: + return "cost-evicted" + case dropSets: + return "sets-dropped" + case rejectSets: + return "sets-rejected" // by policy. + case dropGets: + return "gets-dropped" + case keepGets: + return "gets-kept" + default: + return "unidentified" + } +} + +// Metrics is a snapshot of performance statistics for the lifetime of a cache instance. +type Metrics struct { + all [doNotUse][]*uint64 + + mu sync.RWMutex + life *z.HistogramData // Tracks the life expectancy of a key. +} + +func newMetrics() *Metrics { + s := &Metrics{ + life: z.NewHistogramData(z.HistogramBounds(1, 16)), + } + for i := 0; i < doNotUse; i++ { + s.all[i] = make([]*uint64, 256) + slice := s.all[i] + for j := range slice { + slice[j] = new(uint64) + } + } + return s +} + +func (p *Metrics) add(t metricType, hash, delta uint64) { + if p == nil { + return + } + valp := p.all[t] + // Avoid false sharing by padding at least 64 bytes of space between two + // atomic counters which would be incremented. + idx := (hash % 25) * 10 + atomic.AddUint64(valp[idx], delta) +} + +func (p *Metrics) get(t metricType) uint64 { + if p == nil { + return 0 + } + valp := p.all[t] + var total uint64 + for i := range valp { + total += atomic.LoadUint64(valp[i]) + } + return total +} + +// Hits is the number of Get calls where a value was found for the corresponding key. +func (p *Metrics) Hits() uint64 { + return p.get(hit) +} + +// Misses is the number of Get calls where a value was not found for the corresponding key. +func (p *Metrics) Misses() uint64 { + return p.get(miss) +} + +// KeysAdded is the total number of Set calls where a new key-value item was added. +func (p *Metrics) KeysAdded() uint64 { + return p.get(keyAdd) +} + +// KeysUpdated is the total number of Set calls where the value was updated. +func (p *Metrics) KeysUpdated() uint64 { + return p.get(keyUpdate) +} + +// KeysEvicted is the total number of keys evicted. +func (p *Metrics) KeysEvicted() uint64 { + return p.get(keyEvict) +} + +// CostAdded is the sum of costs that have been added (successful Set calls). +func (p *Metrics) CostAdded() uint64 { + return p.get(costAdd) +} + +// CostEvicted is the sum of all costs that have been evicted. +func (p *Metrics) CostEvicted() uint64 { + return p.get(costEvict) +} + +// SetsDropped is the number of Set calls that don't make it into internal +// buffers (due to contention or some other reason). +func (p *Metrics) SetsDropped() uint64 { + return p.get(dropSets) +} + +// SetsRejected is the number of Set calls rejected by the policy (TinyLFU). +func (p *Metrics) SetsRejected() uint64 { + return p.get(rejectSets) +} + +// GetsDropped is the number of Get counter increments that are dropped +// internally. +func (p *Metrics) GetsDropped() uint64 { + return p.get(dropGets) +} + +// GetsKept is the number of Get counter increments that are kept. +func (p *Metrics) GetsKept() uint64 { + return p.get(keepGets) +} + +// Ratio is the number of Hits over all accesses (Hits + Misses). This is the +// percentage of successful Get calls. +func (p *Metrics) Ratio() float64 { + if p == nil { + return 0.0 + } + hits, misses := p.get(hit), p.get(miss) + if hits == 0 && misses == 0 { + return 0.0 + } + return float64(hits) / float64(hits+misses) +} + +func (p *Metrics) trackEviction(numSeconds int64) { + if p == nil { + return + } + p.mu.Lock() + defer p.mu.Unlock() + p.life.Update(numSeconds) +} + +func (p *Metrics) LifeExpectancySeconds() *z.HistogramData { + if p == nil { + return nil + } + p.mu.RLock() + defer p.mu.RUnlock() + return p.life.Copy() +} + +// Clear resets all the metrics. +func (p *Metrics) Clear() { + if p == nil { + return + } + for i := 0; i < doNotUse; i++ { + for j := range p.all[i] { + atomic.StoreUint64(p.all[i][j], 0) + } + } + p.mu.Lock() + p.life = z.NewHistogramData(z.HistogramBounds(1, 16)) + p.mu.Unlock() +} + +// String returns a string representation of the metrics. +func (p *Metrics) String() string { + if p == nil { + return "" + } + var buf bytes.Buffer + for i := 0; i < doNotUse; i++ { + t := metricType(i) + fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) + } + fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) + fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) + return buf.String() +} diff --git a/vendor/github.com/dgraph-io/ristretto/policy.go b/vendor/github.com/dgraph-io/ristretto/policy.go new file mode 100644 index 0000000000..bf23f91fd9 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/policy.go @@ -0,0 +1,423 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ristretto + +import ( + "math" + "sync" + "sync/atomic" + + "github.com/dgraph-io/ristretto/z" +) + +const ( + // lfuSample is the number of items to sample when looking at eviction + // candidates. 5 seems to be the most optimal number [citation needed]. + lfuSample = 5 +) + +// policy is the interface encapsulating eviction/admission behavior. +// +// TODO: remove this interface and just rename defaultPolicy to policy, as we +// are probably only going to use/implement/maintain one policy. +type policy interface { + ringConsumer + // Add attempts to Add the key-cost pair to the Policy. It returns a slice + // of evicted keys and a bool denoting whether or not the key-cost pair + // was added. If it returns true, the key should be stored in cache. + Add(uint64, int64) ([]*Item, bool) + // Has returns true if the key exists in the Policy. + Has(uint64) bool + // Del deletes the key from the Policy. + Del(uint64) + // Cap returns the available capacity. + Cap() int64 + // Close stops all goroutines and closes all channels. + Close() + // Update updates the cost value for the key. + Update(uint64, int64) + // Cost returns the cost value of a key or -1 if missing. + Cost(uint64) int64 + // Optionally, set stats object to track how policy is performing. + CollectMetrics(*Metrics) + // Clear zeroes out all counters and clears hashmaps. + Clear() + // MaxCost returns the current max cost of the cache policy. + MaxCost() int64 + // UpdateMaxCost updates the max cost of the cache policy. + UpdateMaxCost(int64) +} + +func newPolicy(numCounters, maxCost int64) policy { + return newDefaultPolicy(numCounters, maxCost) +} + +type defaultPolicy struct { + sync.Mutex + admit *tinyLFU + evict *sampledLFU + itemsCh chan []uint64 + stop chan struct{} + isClosed bool + metrics *Metrics +} + +func newDefaultPolicy(numCounters, maxCost int64) *defaultPolicy { + p := &defaultPolicy{ + admit: newTinyLFU(numCounters), + evict: newSampledLFU(maxCost), + itemsCh: make(chan []uint64, 3), + stop: make(chan struct{}), + } + go p.processItems() + return p +} + +func (p *defaultPolicy) CollectMetrics(metrics *Metrics) { + p.metrics = metrics + p.evict.metrics = metrics +} + +type policyPair struct { + key uint64 + cost int64 +} + +func (p *defaultPolicy) processItems() { + for { + select { + case items := <-p.itemsCh: + p.Lock() + p.admit.Push(items) + p.Unlock() + case <-p.stop: + return + } + } +} + +func (p *defaultPolicy) Push(keys []uint64) bool { + if p.isClosed { + return false + } + + if len(keys) == 0 { + return true + } + + select { + case p.itemsCh <- keys: + p.metrics.add(keepGets, keys[0], uint64(len(keys))) + return true + default: + p.metrics.add(dropGets, keys[0], uint64(len(keys))) + return false + } +} + +// Add decides whether the item with the given key and cost should be accepted by +// the policy. It returns the list of victims that have been evicted and a boolean +// indicating whether the incoming item should be accepted. +func (p *defaultPolicy) Add(key uint64, cost int64) ([]*Item, bool) { + p.Lock() + defer p.Unlock() + + // Cannot add an item bigger than entire cache. + if cost > p.evict.getMaxCost() { + return nil, false + } + + // No need to go any further if the item is already in the cache. + if has := p.evict.updateIfHas(key, cost); has { + // An update does not count as an addition, so return false. + return nil, false + } + + // If the execution reaches this point, the key doesn't exist in the cache. + // Calculate the remaining room in the cache (usually bytes). + room := p.evict.roomLeft(cost) + if room >= 0 { + // There's enough room in the cache to store the new item without + // overflowing. Do that now and stop here. + p.evict.add(key, cost) + p.metrics.add(costAdd, key, uint64(cost)) + return nil, true + } + + // incHits is the hit count for the incoming item. + incHits := p.admit.Estimate(key) + // sample is the eviction candidate pool to be filled via random sampling. + // TODO: perhaps we should use a min heap here. Right now our time + // complexity is N for finding the min. Min heap should bring it down to + // O(lg N). + sample := make([]*policyPair, 0, lfuSample) + // As items are evicted they will be appended to victims. + victims := make([]*Item, 0) + + // Delete victims until there's enough space or a minKey is found that has + // more hits than incoming item. + for ; room < 0; room = p.evict.roomLeft(cost) { + // Fill up empty slots in sample. + sample = p.evict.fillSample(sample) + + // Find minimally used item in sample. + minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0) + for i, pair := range sample { + // Look up hit count for sample key. + if hits := p.admit.Estimate(pair.key); hits < minHits { + minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost + } + } + + // If the incoming item isn't worth keeping in the policy, reject. + if incHits < minHits { + p.metrics.add(rejectSets, key, 1) + return victims, false + } + + // Delete the victim from metadata. + p.evict.del(minKey) + + // Delete the victim from sample. + sample[minId] = sample[len(sample)-1] + sample = sample[:len(sample)-1] + // Store victim in evicted victims slice. + victims = append(victims, &Item{ + Key: minKey, + Conflict: 0, + Cost: minCost, + }) + } + + p.evict.add(key, cost) + p.metrics.add(costAdd, key, uint64(cost)) + return victims, true +} + +func (p *defaultPolicy) Has(key uint64) bool { + p.Lock() + _, exists := p.evict.keyCosts[key] + p.Unlock() + return exists +} + +func (p *defaultPolicy) Del(key uint64) { + p.Lock() + p.evict.del(key) + p.Unlock() +} + +func (p *defaultPolicy) Cap() int64 { + p.Lock() + capacity := int64(p.evict.getMaxCost() - p.evict.used) + p.Unlock() + return capacity +} + +func (p *defaultPolicy) Update(key uint64, cost int64) { + p.Lock() + p.evict.updateIfHas(key, cost) + p.Unlock() +} + +func (p *defaultPolicy) Cost(key uint64) int64 { + p.Lock() + if cost, found := p.evict.keyCosts[key]; found { + p.Unlock() + return cost + } + p.Unlock() + return -1 +} + +func (p *defaultPolicy) Clear() { + p.Lock() + p.admit.clear() + p.evict.clear() + p.Unlock() +} + +func (p *defaultPolicy) Close() { + if p.isClosed { + return + } + + // Block until the p.processItems goroutine returns. + p.stop <- struct{}{} + close(p.stop) + close(p.itemsCh) + p.isClosed = true +} + +func (p *defaultPolicy) MaxCost() int64 { + if p == nil || p.evict == nil { + return 0 + } + return p.evict.getMaxCost() +} + +func (p *defaultPolicy) UpdateMaxCost(maxCost int64) { + if p == nil || p.evict == nil { + return + } + p.evict.updateMaxCost(maxCost) +} + +// sampledLFU is an eviction helper storing key-cost pairs. +type sampledLFU struct { + // NOTE: align maxCost to 64-bit boundary for use with atomic. + // As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32, + // and 32-bit MIPS, it is the caller’s responsibility to arrange + // for 64-bit alignment of 64-bit words accessed atomically. + // The first word in a variable or in an allocated struct, array, + // or slice can be relied upon to be 64-bit aligned." + maxCost int64 + used int64 + metrics *Metrics + keyCosts map[uint64]int64 +} + +func newSampledLFU(maxCost int64) *sampledLFU { + return &sampledLFU{ + keyCosts: make(map[uint64]int64), + maxCost: maxCost, + } +} + +func (p *sampledLFU) getMaxCost() int64 { + return atomic.LoadInt64(&p.maxCost) +} + +func (p *sampledLFU) updateMaxCost(maxCost int64) { + atomic.StoreInt64(&p.maxCost, maxCost) +} + +func (p *sampledLFU) roomLeft(cost int64) int64 { + return p.getMaxCost() - (p.used + cost) +} + +func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair { + if len(in) >= lfuSample { + return in + } + for key, cost := range p.keyCosts { + in = append(in, &policyPair{key, cost}) + if len(in) >= lfuSample { + return in + } + } + return in +} + +func (p *sampledLFU) del(key uint64) { + cost, ok := p.keyCosts[key] + if !ok { + return + } + p.used -= cost + delete(p.keyCosts, key) + p.metrics.add(costEvict, key, uint64(cost)) + p.metrics.add(keyEvict, key, 1) +} + +func (p *sampledLFU) add(key uint64, cost int64) { + p.keyCosts[key] = cost + p.used += cost +} + +func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool { + if prev, found := p.keyCosts[key]; found { + // Update the cost of an existing key, but don't worry about evicting. + // Evictions will be handled the next time a new item is added. + p.metrics.add(keyUpdate, key, 1) + if prev > cost { + diff := prev - cost + p.metrics.add(costAdd, key, ^uint64(uint64(diff)-1)) + } else if cost > prev { + diff := cost - prev + p.metrics.add(costAdd, key, uint64(diff)) + } + p.used += cost - prev + p.keyCosts[key] = cost + return true + } + return false +} + +func (p *sampledLFU) clear() { + p.used = 0 + p.keyCosts = make(map[uint64]int64) +} + +// tinyLFU is an admission helper that keeps track of access frequency using +// tiny (4-bit) counters in the form of a count-min sketch. +// tinyLFU is NOT thread safe. +type tinyLFU struct { + freq *cmSketch + door *z.Bloom + incrs int64 + resetAt int64 +} + +func newTinyLFU(numCounters int64) *tinyLFU { + return &tinyLFU{ + freq: newCmSketch(numCounters), + door: z.NewBloomFilter(float64(numCounters), 0.01), + resetAt: numCounters, + } +} + +func (p *tinyLFU) Push(keys []uint64) { + for _, key := range keys { + p.Increment(key) + } +} + +func (p *tinyLFU) Estimate(key uint64) int64 { + hits := p.freq.Estimate(key) + if p.door.Has(key) { + hits++ + } + return hits +} + +func (p *tinyLFU) Increment(key uint64) { + // Flip doorkeeper bit if not already done. + if added := p.door.AddIfNotHas(key); !added { + // Increment count-min counter if doorkeeper bit is already set. + p.freq.Increment(key) + } + p.incrs++ + if p.incrs >= p.resetAt { + p.reset() + } +} + +func (p *tinyLFU) reset() { + // Zero out incrs. + p.incrs = 0 + // clears doorkeeper bits + p.door.Clear() + // halves count-min counters + p.freq.Reset() +} + +func (p *tinyLFU) clear() { + p.incrs = 0 + p.door.Clear() + p.freq.Clear() +} diff --git a/vendor/github.com/dgraph-io/ristretto/ring.go b/vendor/github.com/dgraph-io/ristretto/ring.go new file mode 100644 index 0000000000..5dbed4cc59 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/ring.go @@ -0,0 +1,91 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ristretto + +import ( + "sync" +) + +// ringConsumer is the user-defined object responsible for receiving and +// processing items in batches when buffers are drained. +type ringConsumer interface { + Push([]uint64) bool +} + +// ringStripe is a singular ring buffer that is not concurrent safe. +type ringStripe struct { + cons ringConsumer + data []uint64 + capa int +} + +func newRingStripe(cons ringConsumer, capa int64) *ringStripe { + return &ringStripe{ + cons: cons, + data: make([]uint64, 0, capa), + capa: int(capa), + } +} + +// Push appends an item in the ring buffer and drains (copies items and +// sends to Consumer) if full. +func (s *ringStripe) Push(item uint64) { + s.data = append(s.data, item) + // Decide if the ring buffer should be drained. + if len(s.data) >= s.capa { + // Send elements to consumer and create a new ring stripe. + if s.cons.Push(s.data) { + s.data = make([]uint64, 0, s.capa) + } else { + s.data = s.data[:0] + } + } +} + +// ringBuffer stores multiple buffers (stripes) and distributes Pushed items +// between them to lower contention. +// +// This implements the "batching" process described in the BP-Wrapper paper +// (section III part A). +type ringBuffer struct { + pool *sync.Pool +} + +// newRingBuffer returns a striped ring buffer. The Consumer in ringConfig will +// be called when individual stripes are full and need to drain their elements. +func newRingBuffer(cons ringConsumer, capa int64) *ringBuffer { + // LOSSY buffers use a very simple sync.Pool for concurrently reusing + // stripes. We do lose some stripes due to GC (unheld items in sync.Pool + // are cleared), but the performance gains generally outweigh the small + // percentage of elements lost. The performance primarily comes from + // low-level runtime functions used in the standard library that aren't + // available to us (such as runtime_procPin()). + return &ringBuffer{ + pool: &sync.Pool{ + New: func() interface{} { return newRingStripe(cons, capa) }, + }, + } +} + +// Push adds an element to one of the internal stripes and possibly drains if +// the stripe becomes full. +func (b *ringBuffer) Push(item uint64) { + // Reuse or create a new stripe. + stripe := b.pool.Get().(*ringStripe) + stripe.Push(item) + b.pool.Put(stripe) +} diff --git a/vendor/github.com/dgraph-io/ristretto/sketch.go b/vendor/github.com/dgraph-io/ristretto/sketch.go new file mode 100644 index 0000000000..6368d2bde0 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/sketch.go @@ -0,0 +1,156 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This package includes multiple probabalistic data structures needed for +// admission/eviction metadata. Most are Counting Bloom Filter variations, but +// a caching-specific feature that is also required is a "freshness" mechanism, +// which basically serves as a "lifetime" process. This freshness mechanism +// was described in the original TinyLFU paper [1], but other mechanisms may +// be better suited for certain data distributions. +// +// [1]: https://arxiv.org/abs/1512.00727 +package ristretto + +import ( + "fmt" + "math/rand" + "time" +) + +// cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily +// based on Damian Gryski's CM4 [1]. +// +// [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go +type cmSketch struct { + rows [cmDepth]cmRow + seed [cmDepth]uint64 + mask uint64 +} + +const ( + // cmDepth is the number of counter copies to store (think of it as rows). + cmDepth = 4 +) + +func newCmSketch(numCounters int64) *cmSketch { + if numCounters == 0 { + panic("cmSketch: bad numCounters") + } + // Get the next power of 2 for better cache performance. + numCounters = next2Power(numCounters) + sketch := &cmSketch{mask: uint64(numCounters - 1)} + // Initialize rows of counters and seeds. + // Cryptographic precision not needed + source := rand.New(rand.NewSource(time.Now().UnixNano())) //nolint:gosec + for i := 0; i < cmDepth; i++ { + sketch.seed[i] = source.Uint64() + sketch.rows[i] = newCmRow(numCounters) + } + return sketch +} + +// Increment increments the count(ers) for the specified key. +func (s *cmSketch) Increment(hashed uint64) { + for i := range s.rows { + s.rows[i].increment((hashed ^ s.seed[i]) & s.mask) + } +} + +// Estimate returns the value of the specified key. +func (s *cmSketch) Estimate(hashed uint64) int64 { + min := byte(255) + for i := range s.rows { + val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask) + if val < min { + min = val + } + } + return int64(min) +} + +// Reset halves all counter values. +func (s *cmSketch) Reset() { + for _, r := range s.rows { + r.reset() + } +} + +// Clear zeroes all counters. +func (s *cmSketch) Clear() { + for _, r := range s.rows { + r.clear() + } +} + +// cmRow is a row of bytes, with each byte holding two counters. +type cmRow []byte + +func newCmRow(numCounters int64) cmRow { + return make(cmRow, numCounters/2) +} + +func (r cmRow) get(n uint64) byte { + return byte(r[n/2]>>((n&1)*4)) & 0x0f +} + +func (r cmRow) increment(n uint64) { + // Index of the counter. + i := n / 2 + // Shift distance (even 0, odd 4). + s := (n & 1) * 4 + // Counter value. + v := (r[i] >> s) & 0x0f + // Only increment if not max value (overflow wrap is bad for LFU). + if v < 15 { + r[i] += 1 << s + } +} + +func (r cmRow) reset() { + // Halve each counter. + for i := range r { + r[i] = (r[i] >> 1) & 0x77 + } +} + +func (r cmRow) clear() { + // Zero each counter. + for i := range r { + r[i] = 0 + } +} + +func (r cmRow) string() string { + s := "" + for i := uint64(0); i < uint64(len(r)*2); i++ { + s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f) + } + s = s[:len(s)-1] + return s +} + +// next2Power rounds x up to the next power of 2, if it's not already one. +func next2Power(x int64) int64 { + x-- + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + x |= x >> 32 + x++ + return x +} diff --git a/vendor/github.com/dgraph-io/ristretto/store.go b/vendor/github.com/dgraph-io/ristretto/store.go new file mode 100644 index 0000000000..e42a98b787 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/store.go @@ -0,0 +1,242 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ristretto + +import ( + "sync" + "time" +) + +// TODO: Do we need this to be a separate struct from Item? +type storeItem struct { + key uint64 + conflict uint64 + value interface{} + expiration time.Time +} + +// store is the interface fulfilled by all hash map implementations in this +// file. Some hash map implementations are better suited for certain data +// distributions than others, so this allows us to abstract that out for use +// in Ristretto. +// +// Every store is safe for concurrent usage. +type store interface { + // Get returns the value associated with the key parameter. + Get(uint64, uint64) (interface{}, bool) + // Expiration returns the expiration time for this key. + Expiration(uint64) time.Time + // Set adds the key-value pair to the Map or updates the value if it's + // already present. The key-value pair is passed as a pointer to an + // item object. + Set(*Item) + // Del deletes the key-value pair from the Map. + Del(uint64, uint64) (uint64, interface{}) + // Update attempts to update the key with a new value and returns true if + // successful. + Update(*Item) (interface{}, bool) + // Cleanup removes items that have an expired TTL. + Cleanup(policy policy, onEvict itemCallback) + // Clear clears all contents of the store. + Clear(onEvict itemCallback) +} + +// newStore returns the default store implementation. +func newStore() store { + return newShardedMap() +} + +const numShards uint64 = 256 + +type shardedMap struct { + shards []*lockedMap + expiryMap *expirationMap +} + +func newShardedMap() *shardedMap { + sm := &shardedMap{ + shards: make([]*lockedMap, int(numShards)), + expiryMap: newExpirationMap(), + } + for i := range sm.shards { + sm.shards[i] = newLockedMap(sm.expiryMap) + } + return sm +} + +func (sm *shardedMap) Get(key, conflict uint64) (interface{}, bool) { + return sm.shards[key%numShards].get(key, conflict) +} + +func (sm *shardedMap) Expiration(key uint64) time.Time { + return sm.shards[key%numShards].Expiration(key) +} + +func (sm *shardedMap) Set(i *Item) { + if i == nil { + // If item is nil make this Set a no-op. + return + } + + sm.shards[i.Key%numShards].Set(i) +} + +func (sm *shardedMap) Del(key, conflict uint64) (uint64, interface{}) { + return sm.shards[key%numShards].Del(key, conflict) +} + +func (sm *shardedMap) Update(newItem *Item) (interface{}, bool) { + return sm.shards[newItem.Key%numShards].Update(newItem) +} + +func (sm *shardedMap) Cleanup(policy policy, onEvict itemCallback) { + sm.expiryMap.cleanup(sm, policy, onEvict) +} + +func (sm *shardedMap) Clear(onEvict itemCallback) { + for i := uint64(0); i < numShards; i++ { + sm.shards[i].Clear(onEvict) + } +} + +type lockedMap struct { + sync.RWMutex + data map[uint64]storeItem + em *expirationMap +} + +func newLockedMap(em *expirationMap) *lockedMap { + return &lockedMap{ + data: make(map[uint64]storeItem), + em: em, + } +} + +func (m *lockedMap) get(key, conflict uint64) (interface{}, bool) { + m.RLock() + item, ok := m.data[key] + m.RUnlock() + if !ok { + return nil, false + } + if conflict != 0 && (conflict != item.conflict) { + return nil, false + } + + // Handle expired items. + if !item.expiration.IsZero() && time.Now().After(item.expiration) { + return nil, false + } + return item.value, true +} + +func (m *lockedMap) Expiration(key uint64) time.Time { + m.RLock() + defer m.RUnlock() + return m.data[key].expiration +} + +func (m *lockedMap) Set(i *Item) { + if i == nil { + // If the item is nil make this Set a no-op. + return + } + + m.Lock() + defer m.Unlock() + item, ok := m.data[i.Key] + + if ok { + // The item existed already. We need to check the conflict key and reject the + // update if they do not match. Only after that the expiration map is updated. + if i.Conflict != 0 && (i.Conflict != item.conflict) { + return + } + m.em.update(i.Key, i.Conflict, item.expiration, i.Expiration) + } else { + // The value is not in the map already. There's no need to return anything. + // Simply add the expiration map. + m.em.add(i.Key, i.Conflict, i.Expiration) + } + + m.data[i.Key] = storeItem{ + key: i.Key, + conflict: i.Conflict, + value: i.Value, + expiration: i.Expiration, + } +} + +func (m *lockedMap) Del(key, conflict uint64) (uint64, interface{}) { + m.Lock() + item, ok := m.data[key] + if !ok { + m.Unlock() + return 0, nil + } + if conflict != 0 && (conflict != item.conflict) { + m.Unlock() + return 0, nil + } + + if !item.expiration.IsZero() { + m.em.del(key, item.expiration) + } + + delete(m.data, key) + m.Unlock() + return item.conflict, item.value +} + +func (m *lockedMap) Update(newItem *Item) (interface{}, bool) { + m.Lock() + item, ok := m.data[newItem.Key] + if !ok { + m.Unlock() + return nil, false + } + if newItem.Conflict != 0 && (newItem.Conflict != item.conflict) { + m.Unlock() + return nil, false + } + + m.em.update(newItem.Key, newItem.Conflict, item.expiration, newItem.Expiration) + m.data[newItem.Key] = storeItem{ + key: newItem.Key, + conflict: newItem.Conflict, + value: newItem.Value, + expiration: newItem.Expiration, + } + + m.Unlock() + return item.value, true +} + +func (m *lockedMap) Clear(onEvict itemCallback) { + m.Lock() + i := &Item{} + if onEvict != nil { + for _, si := range m.data { + i.Key = si.key + i.Conflict = si.conflict + i.Value = si.value + onEvict(i) + } + } + m.data = make(map[uint64]storeItem) + m.Unlock() +} diff --git a/vendor/github.com/dgraph-io/ristretto/ttl.go b/vendor/github.com/dgraph-io/ristretto/ttl.go new file mode 100644 index 0000000000..337976ad43 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/ttl.go @@ -0,0 +1,147 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ristretto + +import ( + "sync" + "time" +) + +var ( + // TODO: find the optimal value or make it configurable. + bucketDurationSecs = int64(5) +) + +func storageBucket(t time.Time) int64 { + return (t.Unix() / bucketDurationSecs) + 1 +} + +func cleanupBucket(t time.Time) int64 { + // The bucket to cleanup is always behind the storage bucket by one so that + // no elements in that bucket (which might not have expired yet) are deleted. + return storageBucket(t) - 1 +} + +// bucket type is a map of key to conflict. +type bucket map[uint64]uint64 + +// expirationMap is a map of bucket number to the corresponding bucket. +type expirationMap struct { + sync.RWMutex + buckets map[int64]bucket +} + +func newExpirationMap() *expirationMap { + return &expirationMap{ + buckets: make(map[int64]bucket), + } +} + +func (m *expirationMap) add(key, conflict uint64, expiration time.Time) { + if m == nil { + return + } + + // Items that don't expire don't need to be in the expiration map. + if expiration.IsZero() { + return + } + + bucketNum := storageBucket(expiration) + m.Lock() + defer m.Unlock() + + b, ok := m.buckets[bucketNum] + if !ok { + b = make(bucket) + m.buckets[bucketNum] = b + } + b[key] = conflict +} + +func (m *expirationMap) update(key, conflict uint64, oldExpTime, newExpTime time.Time) { + if m == nil { + return + } + + m.Lock() + defer m.Unlock() + + oldBucketNum := storageBucket(oldExpTime) + oldBucket, ok := m.buckets[oldBucketNum] + if ok { + delete(oldBucket, key) + } + + newBucketNum := storageBucket(newExpTime) + newBucket, ok := m.buckets[newBucketNum] + if !ok { + newBucket = make(bucket) + m.buckets[newBucketNum] = newBucket + } + newBucket[key] = conflict +} + +func (m *expirationMap) del(key uint64, expiration time.Time) { + if m == nil { + return + } + + bucketNum := storageBucket(expiration) + m.Lock() + defer m.Unlock() + _, ok := m.buckets[bucketNum] + if !ok { + return + } + delete(m.buckets[bucketNum], key) +} + +// cleanup removes all the items in the bucket that was just completed. It deletes +// those items from the store, and calls the onEvict function on those items. +// This function is meant to be called periodically. +func (m *expirationMap) cleanup(store store, policy policy, onEvict itemCallback) { + if m == nil { + return + } + + m.Lock() + now := time.Now() + bucketNum := cleanupBucket(now) + keys := m.buckets[bucketNum] + delete(m.buckets, bucketNum) + m.Unlock() + + for key, conflict := range keys { + // Sanity check. Verify that the store agrees that this key is expired. + if store.Expiration(key).After(now) { + continue + } + + cost := policy.Cost(key) + policy.Del(key) + _, value := store.Del(key, conflict) + + if onEvict != nil { + onEvict(&Item{Key: key, + Conflict: conflict, + Value: value, + Cost: cost, + }) + } + } +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/LICENSE b/vendor/github.com/dgraph-io/ristretto/z/LICENSE new file mode 100644 index 0000000000..0860cbfe85 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/LICENSE @@ -0,0 +1,64 @@ +bbloom.go + +// The MIT License (MIT) +// Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt + +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +rtutil.go + +// MIT License + +// Copyright (c) 2019 Ewan Chou + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +Modifications: + +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + diff --git a/vendor/github.com/dgraph-io/ristretto/z/README.md b/vendor/github.com/dgraph-io/ristretto/z/README.md new file mode 100644 index 0000000000..6d77e146eb --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/README.md @@ -0,0 +1,129 @@ +## bbloom: a bitset Bloom filter for go/golang +=== + +package implements a fast bloom filter with real 'bitset' and JSONMarshal/JSONUnmarshal to store/reload the Bloom filter. + +NOTE: the package uses unsafe.Pointer to set and read the bits from the bitset. If you're uncomfortable with using the unsafe package, please consider using my bloom filter package at github.com/AndreasBriese/bloom + +=== + +changelog 11/2015: new thread safe methods AddTS(), HasTS(), AddIfNotHasTS() following a suggestion from Srdjan Marinovic (github @a-little-srdjan), who used this to code a bloomfilter cache. + +This bloom filter was developed to strengthen a website-log database and was tested and optimized for this log-entry mask: "2014/%02i/%02i %02i:%02i:%02i /info.html". +Nonetheless bbloom should work with any other form of entries. + +~~Hash function is a modified Berkeley DB sdbm hash (to optimize for smaller strings). sdbm http://www.cse.yorku.ca/~oz/hash.html~~ + +Found sipHash (SipHash-2-4, a fast short-input PRF created by Jean-Philippe Aumasson and Daniel J. Bernstein.) to be about as fast. sipHash had been ported by Dimtry Chestnyk to Go (github.com/dchest/siphash ) + +Minimum hashset size is: 512 ([4]uint64; will be set automatically). + +###install + +```sh +go get github.com/AndreasBriese/bbloom +``` + +###test ++ change to folder ../bbloom ++ create wordlist in file "words.txt" (you might use `python permut.py`) ++ run 'go test -bench=.' within the folder + +```go +go test -bench=. +``` + +~~If you've installed the GOCONVEY TDD-framework http://goconvey.co/ you can run the tests automatically.~~ + +using go's testing framework now (have in mind that the op timing is related to 65536 operations of Add, Has, AddIfNotHas respectively) + +### usage + +after installation add + +```go +import ( + ... + "github.com/AndreasBriese/bbloom" + ... + ) +``` + +at your header. In the program use + +```go +// create a bloom filter for 65536 items and 1 % wrong-positive ratio +bf := bbloom.New(float64(1<<16), float64(0.01)) + +// or +// create a bloom filter with 650000 for 65536 items and 7 locs per hash explicitly +// bf = bbloom.New(float64(650000), float64(7)) +// or +bf = bbloom.New(650000.0, 7.0) + +// add one item +bf.Add([]byte("butter")) + +// Number of elements added is exposed now +// Note: ElemNum will not be included in JSON export (for compatability to older version) +nOfElementsInFilter := bf.ElemNum + +// check if item is in the filter +isIn := bf.Has([]byte("butter")) // should be true +isNotIn := bf.Has([]byte("Butter")) // should be false + +// 'add only if item is new' to the bloomfilter +added := bf.AddIfNotHas([]byte("butter")) // should be false because 'butter' is already in the set +added = bf.AddIfNotHas([]byte("buTTer")) // should be true because 'buTTer' is new + +// thread safe versions for concurrent use: AddTS, HasTS, AddIfNotHasTS +// add one item +bf.AddTS([]byte("peanutbutter")) +// check if item is in the filter +isIn = bf.HasTS([]byte("peanutbutter")) // should be true +isNotIn = bf.HasTS([]byte("peanutButter")) // should be false +// 'add only if item is new' to the bloomfilter +added = bf.AddIfNotHasTS([]byte("butter")) // should be false because 'peanutbutter' is already in the set +added = bf.AddIfNotHasTS([]byte("peanutbuTTer")) // should be true because 'penutbuTTer' is new + +// convert to JSON ([]byte) +Json := bf.JSONMarshal() + +// bloomfilters Mutex is exposed for external un-/locking +// i.e. mutex lock while doing JSON conversion +bf.Mtx.Lock() +Json = bf.JSONMarshal() +bf.Mtx.Unlock() + +// restore a bloom filter from storage +bfNew := bbloom.JSONUnmarshal(Json) + +isInNew := bfNew.Has([]byte("butter")) // should be true +isNotInNew := bfNew.Has([]byte("Butter")) // should be false + +``` + +to work with the bloom filter. + +### why 'fast'? + +It's about 3 times faster than William Fitzgeralds bitset bloom filter https://github.com/willf/bloom . And it is about so fast as my []bool set variant for Boom filters (see https://github.com/AndreasBriese/bloom ) but having a 8times smaller memory footprint: + + + Bloom filter (filter size 524288, 7 hashlocs) + github.com/AndreasBriese/bbloom 'Add' 65536 items (10 repetitions): 6595800 ns (100 ns/op) + github.com/AndreasBriese/bbloom 'Has' 65536 items (10 repetitions): 5986600 ns (91 ns/op) + github.com/AndreasBriese/bloom 'Add' 65536 items (10 repetitions): 6304684 ns (96 ns/op) + github.com/AndreasBriese/bloom 'Has' 65536 items (10 repetitions): 6568663 ns (100 ns/op) + + github.com/willf/bloom 'Add' 65536 items (10 repetitions): 24367224 ns (371 ns/op) + github.com/willf/bloom 'Test' 65536 items (10 repetitions): 21881142 ns (333 ns/op) + github.com/dataence/bloom/standard 'Add' 65536 items (10 repetitions): 23041644 ns (351 ns/op) + github.com/dataence/bloom/standard 'Check' 65536 items (10 repetitions): 19153133 ns (292 ns/op) + github.com/cabello/bloom 'Add' 65536 items (10 repetitions): 131921507 ns (2012 ns/op) + github.com/cabello/bloom 'Contains' 65536 items (10 repetitions): 131108962 ns (2000 ns/op) + +(on MBPro15 OSX10.8.5 i7 4Core 2.4Ghz) + + +With 32bit bloom filters (bloom32) using modified sdbm, bloom32 does hashing with only 2 bit shifts, one xor and one substraction per byte. smdb is about as fast as fnv64a but gives less collisions with the dataset (see mask above). bloom.New(float64(10 * 1<<16),float64(7)) populated with 1<<16 random items from the dataset (see above) and tested against the rest results in less than 0.05% collisions. diff --git a/vendor/github.com/dgraph-io/ristretto/z/allocator.go b/vendor/github.com/dgraph-io/ristretto/z/allocator.go new file mode 100644 index 0000000000..eae0f83449 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/allocator.go @@ -0,0 +1,403 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "bytes" + "fmt" + "math" + "math/bits" + "math/rand" + "strings" + "sync" + "sync/atomic" + "time" + "unsafe" + + "github.com/dustin/go-humanize" +) + +// Allocator amortizes the cost of small allocations by allocating memory in +// bigger chunks. Internally it uses z.Calloc to allocate memory. Once +// allocated, the memory is not moved, so it is safe to use the allocated bytes +// to unsafe cast them to Go struct pointers. Maintaining a freelist is slow. +// Instead, Allocator only allocates memory, with the idea that finally we +// would just release the entire Allocator. +type Allocator struct { + sync.Mutex + compIdx uint64 // Stores bufIdx in 32 MSBs and posIdx in 32 LSBs. + buffers [][]byte + Ref uint64 + Tag string +} + +// allocs keeps references to all Allocators, so we can safely discard them later. +var allocsMu *sync.Mutex +var allocRef uint64 +var allocs map[uint64]*Allocator +var calculatedLog2 []int + +func init() { + allocsMu = new(sync.Mutex) + allocs = make(map[uint64]*Allocator) + + // Set up a unique Ref per process. + rand.Seed(time.Now().UnixNano()) + allocRef = uint64(rand.Int63n(1<<16)) << 48 //nolint:gosec // cryptographic precision not needed + + calculatedLog2 = make([]int, 1025) + for i := 1; i <= 1024; i++ { + calculatedLog2[i] = int(math.Log2(float64(i))) + } +} + +// NewAllocator creates an allocator starting with the given size. +func NewAllocator(sz int, tag string) *Allocator { + ref := atomic.AddUint64(&allocRef, 1) + // We should not allow a zero sized page because addBufferWithMinSize + // will run into an infinite loop trying to double the pagesize. + if sz < 512 { + sz = 512 + } + a := &Allocator{ + Ref: ref, + buffers: make([][]byte, 64), + Tag: tag, + } + l2 := uint64(log2(sz)) + if bits.OnesCount64(uint64(sz)) > 1 { + l2 += 1 + } + a.buffers[0] = Calloc(1<> 32), int(pos & 0xFFFFFFFF) +} + +// Size returns the size of the allocations so far. +func (a *Allocator) Size() int { + pos := atomic.LoadUint64(&a.compIdx) + bi, pi := parse(pos) + var sz int + for i, b := range a.buffers { + if i < bi { + sz += len(b) + continue + } + sz += pi + return sz + } + panic("Size should not reach here") +} + +func log2(sz int) int { + if sz < len(calculatedLog2) { + return calculatedLog2[sz] + } + pow := 10 + sz >>= 10 + for sz > 1 { + sz >>= 1 + pow++ + } + return pow +} + +func (a *Allocator) Allocated() uint64 { + var alloc int + for _, b := range a.buffers { + alloc += cap(b) + } + return uint64(alloc) +} + +func (a *Allocator) TrimTo(max int) { + var alloc int + for i, b := range a.buffers { + if len(b) == 0 { + break + } + alloc += len(b) + if alloc < max { + continue + } + Free(b) + a.buffers[i] = nil + } +} + +// Release would release the memory back. Remember to make this call to avoid memory leaks. +func (a *Allocator) Release() { + if a == nil { + return + } + + var alloc int + for _, b := range a.buffers { + if len(b) == 0 { + break + } + alloc += len(b) + Free(b) + } + + allocsMu.Lock() + delete(allocs, a.Ref) + allocsMu.Unlock() +} + +const maxAlloc = 1 << 30 + +func (a *Allocator) MaxAlloc() int { + return maxAlloc +} + +const nodeAlign = unsafe.Sizeof(uint64(0)) - 1 + +func (a *Allocator) AllocateAligned(sz int) []byte { + tsz := sz + int(nodeAlign) + out := a.Allocate(tsz) + // We are reusing allocators. In that case, it's important to zero out the memory allocated + // here. We don't always zero it out (in Allocate), because other functions would be immediately + // overwriting the allocated slices anyway (see Copy). + ZeroOut(out, 0, len(out)) + + addr := uintptr(unsafe.Pointer(&out[0])) + aligned := (addr + nodeAlign) & ^nodeAlign + start := int(aligned - addr) + + return out[start : start+sz] +} + +func (a *Allocator) Copy(buf []byte) []byte { + if a == nil { + return append([]byte{}, buf...) + } + out := a.Allocate(len(buf)) + copy(out, buf) + return out +} + +func (a *Allocator) addBufferAt(bufIdx, minSz int) { + for { + if bufIdx >= len(a.buffers) { + panic(fmt.Sprintf("Allocator can not allocate more than %d buffers", len(a.buffers))) + } + if len(a.buffers[bufIdx]) == 0 { + break + } + if minSz <= len(a.buffers[bufIdx]) { + // No need to do anything. We already have a buffer which can satisfy minSz. + return + } + bufIdx++ + } + assert(bufIdx > 0) + // We need to allocate a new buffer. + // Make pageSize double of the last allocation. + pageSize := 2 * len(a.buffers[bufIdx-1]) + // Ensure pageSize is bigger than sz. + for pageSize < minSz { + pageSize *= 2 + } + // If bigger than maxAlloc, trim to maxAlloc. + if pageSize > maxAlloc { + pageSize = maxAlloc + } + + buf := Calloc(pageSize, a.Tag) + assert(len(a.buffers[bufIdx]) == 0) + a.buffers[bufIdx] = buf +} + +func (a *Allocator) Allocate(sz int) []byte { + if a == nil { + return make([]byte, sz) + } + if sz > maxAlloc { + panic(fmt.Sprintf("Unable to allocate more than %d\n", maxAlloc)) + } + if sz == 0 { + return nil + } + for { + pos := atomic.AddUint64(&a.compIdx, uint64(sz)) + bufIdx, posIdx := parse(pos) + buf := a.buffers[bufIdx] + if posIdx > len(buf) { + a.Lock() + newPos := atomic.LoadUint64(&a.compIdx) + newBufIdx, _ := parse(newPos) + if newBufIdx != bufIdx { + a.Unlock() + continue + } + a.addBufferAt(bufIdx+1, sz) + atomic.StoreUint64(&a.compIdx, uint64((bufIdx+1)<<32)) + a.Unlock() + // We added a new buffer. Let's acquire slice the right way by going back to the top. + continue + } + data := buf[posIdx-sz : posIdx] + return data + } +} + +type AllocatorPool struct { + numGets int64 + allocCh chan *Allocator + closer *Closer +} + +func NewAllocatorPool(sz int) *AllocatorPool { + a := &AllocatorPool{ + allocCh: make(chan *Allocator, sz), + closer: NewCloser(1), + } + go a.freeupAllocators() + return a +} + +func (p *AllocatorPool) Get(sz int, tag string) *Allocator { + if p == nil { + return NewAllocator(sz, tag) + } + atomic.AddInt64(&p.numGets, 1) + select { + case alloc := <-p.allocCh: + alloc.Reset() + alloc.Tag = tag + return alloc + default: + return NewAllocator(sz, tag) + } +} +func (p *AllocatorPool) Return(a *Allocator) { + if a == nil { + return + } + if p == nil { + a.Release() + return + } + a.TrimTo(400 << 20) + + select { + case p.allocCh <- a: + return + default: + a.Release() + } +} + +func (p *AllocatorPool) Release() { + if p == nil { + return + } + p.closer.SignalAndWait() +} + +func (p *AllocatorPool) freeupAllocators() { + defer p.closer.Done() + + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + releaseOne := func() bool { + select { + case alloc := <-p.allocCh: + alloc.Release() + return true + default: + return false + } + } + + var last int64 + for { + select { + case <-p.closer.HasBeenClosed(): + close(p.allocCh) + for alloc := range p.allocCh { + alloc.Release() + } + return + + case <-ticker.C: + gets := atomic.LoadInt64(&p.numGets) + if gets != last { + // Some retrievals were made since the last time. So, let's avoid doing a release. + last = gets + continue + } + releaseOne() + } + } +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/bbloom.go b/vendor/github.com/dgraph-io/ristretto/z/bbloom.go new file mode 100644 index 0000000000..37135b012f --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/bbloom.go @@ -0,0 +1,211 @@ +// The MIT License (MIT) +// Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt + +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +package z + +import ( + "bytes" + "encoding/json" + "math" + "unsafe" + + "github.com/golang/glog" +) + +// helper +var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} + +func getSize(ui64 uint64) (size uint64, exponent uint64) { + if ui64 < uint64(512) { + ui64 = uint64(512) + } + size = uint64(1) + for size < ui64 { + size <<= 1 + exponent++ + } + return size, exponent +} + +func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { + size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) + locs := math.Ceil(float64(0.69314718056) * size / numEntries) + return uint64(size), uint64(locs) +} + +// NewBloomFilter returns a new bloomfilter. +func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { + var entries, locs uint64 + if len(params) == 2 { + if params[1] < 1 { + entries, locs = calcSizeByWrongPositives(params[0], params[1]) + } else { + entries, locs = uint64(params[0]), uint64(params[1]) + } + } else { + glog.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + + " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + + " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") + } + size, exponent := getSize(entries) + bloomfilter = &Bloom{ + sizeExp: exponent, + size: size - 1, + setLocs: locs, + shift: 64 - exponent, + } + bloomfilter.Size(size) + return bloomfilter +} + +// Bloom filter +type Bloom struct { + bitset []uint64 + ElemNum uint64 + sizeExp uint64 + size uint64 + setLocs uint64 + shift uint64 +} + +// <--- http://www.cse.yorku.ca/~oz/hash.html +// modified Berkeley DB Hash (32bit) +// hash is casted to l, h = 16bit fragments +// func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { +// hash := uint64(len(*b)) +// for _, c := range *b { +// hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash +// } +// h = hash >> bl.shift +// l = hash << bl.shift >> bl.shift +// return l, h +// } + +// Add adds hash of a key to the bloomfilter. +func (bl *Bloom) Add(hash uint64) { + h := hash >> bl.shift + l := hash << bl.shift >> bl.shift + for i := uint64(0); i < bl.setLocs; i++ { + bl.Set((h + i*l) & bl.size) + bl.ElemNum++ + } +} + +// Has checks if bit(s) for entry hash is/are set, +// returns true if the hash was added to the Bloom Filter. +func (bl Bloom) Has(hash uint64) bool { + h := hash >> bl.shift + l := hash << bl.shift >> bl.shift + for i := uint64(0); i < bl.setLocs; i++ { + if !bl.IsSet((h + i*l) & bl.size) { + return false + } + } + return true +} + +// AddIfNotHas only Adds hash, if it's not present in the bloomfilter. +// Returns true if hash was added. +// Returns false if hash was already registered in the bloomfilter. +func (bl *Bloom) AddIfNotHas(hash uint64) bool { + if bl.Has(hash) { + return false + } + bl.Add(hash) + return true +} + +// TotalSize returns the total size of the bloom filter. +func (bl *Bloom) TotalSize() int { + // The bl struct has 5 members and each one is 8 byte. The bitset is a + // uint64 byte slice. + return len(bl.bitset)*8 + 5*8 +} + +// Size makes Bloom filter with as bitset of size sz. +func (bl *Bloom) Size(sz uint64) { + bl.bitset = make([]uint64, sz>>6) +} + +// Clear resets the Bloom filter. +func (bl *Bloom) Clear() { + for i := range bl.bitset { + bl.bitset[i] = 0 + } +} + +// Set sets the bit[idx] of bitset. +func (bl *Bloom) Set(idx uint64) { + ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) + *(*uint8)(ptr) |= mask[idx%8] +} + +// IsSet checks if bit[idx] of bitset is set, returns true/false. +func (bl *Bloom) IsSet(idx uint64) bool { + ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) + r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 + return r == 1 +} + +// bloomJSONImExport +// Im/Export structure used by JSONMarshal / JSONUnmarshal +type bloomJSONImExport struct { + FilterSet []byte + SetLocs uint64 +} + +// NewWithBoolset takes a []byte slice and number of locs per entry, +// returns the bloomfilter with a bitset populated according to the input []byte. +func newWithBoolset(bs *[]byte, locs uint64) *Bloom { + bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) + for i, b := range *bs { + *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b + } + return bloomfilter +} + +// JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes +// returns bloom32 / bloom64 object. +func JSONUnmarshal(dbData []byte) (*Bloom, error) { + bloomImEx := bloomJSONImExport{} + if err := json.Unmarshal(dbData, &bloomImEx); err != nil { + return nil, err + } + buf := bytes.NewBuffer(bloomImEx.FilterSet) + bs := buf.Bytes() + bf := newWithBoolset(&bs, bloomImEx.SetLocs) + return bf, nil +} + +// JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. +func (bl Bloom) JSONMarshal() []byte { + bloomImEx := bloomJSONImExport{} + bloomImEx.SetLocs = bl.setLocs + bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) + for i := range bloomImEx.FilterSet { + bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + + uintptr(i))) + } + data, err := json.Marshal(bloomImEx) + if err != nil { + glog.Fatal("json.Marshal failed: ", err) + } + return data +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/btree.go b/vendor/github.com/dgraph-io/ristretto/z/btree.go new file mode 100644 index 0000000000..12b735bb03 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/btree.go @@ -0,0 +1,710 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "fmt" + "math" + "os" + "reflect" + "strings" + "unsafe" + + "github.com/dgraph-io/ristretto/z/simd" +) + +var ( + pageSize = os.Getpagesize() + maxKeys = (pageSize / 16) - 1 + oneThird = int(float64(maxKeys) / 3) +) + +const ( + absoluteMax = uint64(math.MaxUint64 - 1) + minSize = 1 << 20 +) + +// Tree represents the structure for custom mmaped B+ tree. +// It supports keys in range [1, math.MaxUint64-1] and values [1, math.Uint64]. +type Tree struct { + buffer *Buffer + data []byte + nextPage uint64 + freePage uint64 + stats TreeStats +} + +func (t *Tree) initRootNode() { + // This is the root node. + t.newNode(0) + // This acts as the rightmost pointer (all the keys are <= this key). + t.Set(absoluteMax, 0) +} + +// NewTree returns an in-memory B+ tree. +func NewTree(tag string) *Tree { + const defaultTag = "tree" + if tag == "" { + tag = defaultTag + } + t := &Tree{buffer: NewBuffer(minSize, tag)} + t.Reset() + return t +} + +// NewTree returns a persistent on-disk B+ tree. +func NewTreePersistent(path string) (*Tree, error) { + t := &Tree{} + var err error + + // Open the buffer from disk and set it to the maximum allocated size. + t.buffer, err = NewBufferPersistent(path, minSize) + if err != nil { + return nil, err + } + t.buffer.offset = uint64(len(t.buffer.buf)) + t.data = t.buffer.Bytes() + + // pageID can never be 0 if the tree has been initialized. + root := t.node(1) + isInitialized := root.pageID() != 0 + + if !isInitialized { + t.nextPage = 1 + t.freePage = 0 + t.initRootNode() + } else { + t.reinit() + } + + return t, nil +} + +// reinit sets the internal variables of a Tree, which are normally stored +// in-memory, but are lost when loading from disk. +func (t *Tree) reinit() { + // Calculate t.nextPage by finding the first node whose pageID is not set. + t.nextPage = 1 + for int(t.nextPage)*pageSize < len(t.data) { + n := t.node(t.nextPage) + if n.pageID() == 0 { + break + } + t.nextPage++ + } + maxPageId := t.nextPage - 1 + + // Calculate t.freePage by finding the page to which no other page points. + // This would be the head of the page linked list. + // tailPages[i] is true if pageId i+1 is not the head of the list. + tailPages := make([]bool, maxPageId) + // Mark all pages containing nodes as tail pages. + t.Iterate(func(n node) { + i := n.pageID() - 1 + tailPages[i] = true + // If this is a leaf node, increment the stats. + if n.isLeaf() { + t.stats.NumLeafKeys += n.numKeys() + } + }) + // pointedPages is a list of page IDs that the tail pages point to. + pointedPages := make([]uint64, 0) + for i, isTail := range tailPages { + if !isTail { + pageId := uint64(i) + 1 + // Skip if nextPageId = 0, as that is equivalent to null page. + if nextPageId := t.node(pageId).uint64(0); nextPageId != 0 { + pointedPages = append(pointedPages, nextPageId) + } + t.stats.NumPagesFree++ + } + } + + // Mark all pages being pointed to as tail pages. + for _, pageId := range pointedPages { + i := pageId - 1 + tailPages[i] = true + } + // There should only be one head page left. + for i, isTail := range tailPages { + if !isTail { + pageId := uint64(i) + 1 + t.freePage = pageId + break + } + } +} + +// Reset resets the tree and truncates it to maxSz. +func (t *Tree) Reset() { + // Tree relies on uninitialized data being zeroed out, so we need to Memclr + // the data before using it again. + Memclr(t.buffer.buf) + t.buffer.Reset() + t.buffer.AllocateOffset(minSize) + t.data = t.buffer.Bytes() + t.stats = TreeStats{} + t.nextPage = 1 + t.freePage = 0 + t.initRootNode() +} + +// Close releases the memory used by the tree. +func (t *Tree) Close() error { + if t == nil { + return nil + } + return t.buffer.Release() +} + +type TreeStats struct { + Allocated int // Derived. + Bytes int // Derived. + NumLeafKeys int // Calculated. + NumPages int // Derived. + NumPagesFree int // Calculated. + Occupancy float64 // Derived. + PageSize int // Derived. +} + +// Stats returns stats about the tree. +func (t *Tree) Stats() TreeStats { + numPages := int(t.nextPage - 1) + out := TreeStats{ + Bytes: numPages * pageSize, + Allocated: len(t.data), + NumLeafKeys: t.stats.NumLeafKeys, + NumPages: numPages, + NumPagesFree: t.stats.NumPagesFree, + PageSize: pageSize, + } + out.Occupancy = 100.0 * float64(out.NumLeafKeys) / float64(maxKeys*numPages) + return out +} + +// BytesToUint64Slice converts a byte slice to a uint64 slice. +func BytesToUint64Slice(b []byte) []uint64 { + if len(b) == 0 { + return nil + } + var u64s []uint64 + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&u64s)) + hdr.Len = len(b) / 8 + hdr.Cap = hdr.Len + hdr.Data = uintptr(unsafe.Pointer(&b[0])) + return u64s +} + +func (t *Tree) newNode(bit uint64) node { + var pageId uint64 + if t.freePage > 0 { + pageId = t.freePage + t.stats.NumPagesFree-- + } else { + pageId = t.nextPage + t.nextPage++ + offset := int(pageId) * pageSize + reqSize := offset + pageSize + if reqSize > len(t.data) { + t.buffer.AllocateOffset(reqSize - len(t.data)) + t.data = t.buffer.Bytes() + } + } + n := t.node(pageId) + if t.freePage > 0 { + t.freePage = n.uint64(0) + } + zeroOut(n) + n.setBit(bit) + n.setAt(keyOffset(maxKeys), pageId) + return n +} + +func getNode(data []byte) node { + return node(BytesToUint64Slice(data)) +} + +func zeroOut(data []uint64) { + for i := 0; i < len(data); i++ { + data[i] = 0 + } +} + +func (t *Tree) node(pid uint64) node { + // page does not exist + if pid == 0 { + return nil + } + start := pageSize * int(pid) + return getNode(t.data[start : start+pageSize]) +} + +// Set sets the key-value pair in the tree. +func (t *Tree) Set(k, v uint64) { + if k == math.MaxUint64 || k == 0 { + panic("Error setting zero or MaxUint64") + } + root := t.set(1, k, v) + if root.isFull() { + right := t.split(1) + left := t.newNode(root.bits()) + // Re-read the root as the underlying buffer for tree might have changed during split. + root = t.node(1) + copy(left[:keyOffset(maxKeys)], root) + left.setNumKeys(root.numKeys()) + + // reset the root node. + zeroOut(root[:keyOffset(maxKeys)]) + root.setNumKeys(0) + + // set the pointers for left and right child in the root node. + root.set(left.maxKey(), left.pageID()) + root.set(right.maxKey(), right.pageID()) + } +} + +// For internal nodes, they contain . +// where all entries <= key are stored in the corresponding ptr. +func (t *Tree) set(pid, k, v uint64) node { + n := t.node(pid) + if n.isLeaf() { + t.stats.NumLeafKeys += n.set(k, v) + return n + } + + // This is an internal node. + idx := n.search(k) + if idx >= maxKeys { + panic("search returned index >= maxKeys") + } + // If no key at idx. + if n.key(idx) == 0 { + n.setAt(keyOffset(idx), k) + n.setNumKeys(n.numKeys() + 1) + } + child := t.node(n.val(idx)) + if child == nil { + child = t.newNode(bitLeaf) + n = t.node(pid) + n.setAt(valOffset(idx), child.pageID()) + } + child = t.set(child.pageID(), k, v) + // Re-read n as the underlying buffer for tree might have changed during set. + n = t.node(pid) + if child.isFull() { + // Just consider the left sibling for simplicity. + // if t.shareWithSibling(n, idx) { + // return n + // } + + nn := t.split(child.pageID()) + // Re-read n and child as the underlying buffer for tree might have changed during split. + n = t.node(pid) + child = t.node(n.uint64(valOffset(idx))) + // Set child pointers in the node n. + // Note that key for right node (nn) already exist in node n, but the + // pointer is updated. + n.set(child.maxKey(), child.pageID()) + n.set(nn.maxKey(), nn.pageID()) + } + return n +} + +// Get looks for key and returns the corresponding value. +// If key is not found, 0 is returned. +func (t *Tree) Get(k uint64) uint64 { + if k == math.MaxUint64 || k == 0 { + panic("Does not support getting MaxUint64/Zero") + } + root := t.node(1) + return t.get(root, k) +} + +func (t *Tree) get(n node, k uint64) uint64 { + if n.isLeaf() { + return n.get(k) + } + // This is internal node + idx := n.search(k) + if idx == n.numKeys() || n.key(idx) == 0 { + return 0 + } + child := t.node(n.uint64(valOffset(idx))) + assert(child != nil) + return t.get(child, k) +} + +// DeleteBelow deletes all keys with value under ts. +func (t *Tree) DeleteBelow(ts uint64) { + root := t.node(1) + t.stats.NumLeafKeys = 0 + t.compact(root, ts) + assert(root.numKeys() >= 1) +} + +func (t *Tree) compact(n node, ts uint64) int { + if n.isLeaf() { + numKeys := n.compact(ts) + t.stats.NumLeafKeys += n.numKeys() + return numKeys + } + // Not leaf. + N := n.numKeys() + for i := 0; i < N; i++ { + assert(n.key(i) > 0) + childID := n.uint64(valOffset(i)) + child := t.node(childID) + if rem := t.compact(child, ts); rem == 0 && i < N-1 { + // If no valid key is remaining we can drop this child. However, don't do that if this + // is the max key. + t.stats.NumLeafKeys -= child.numKeys() + child.setAt(0, t.freePage) + t.freePage = childID + n.setAt(valOffset(i), 0) + t.stats.NumPagesFree++ + } + } + // We use ts=1 here because we want to delete all the keys whose value is 0, which means they no + // longer have a valid page for that key. + return n.compact(1) +} + +func (t *Tree) iterate(n node, fn func(node)) { + fn(n) + if n.isLeaf() { + return + } + // Explore children. + for i := 0; i < maxKeys; i++ { + if n.key(i) == 0 { + return + } + childID := n.uint64(valOffset(i)) + assert(childID > 0) + + child := t.node(childID) + t.iterate(child, fn) + } +} + +// Iterate iterates over the tree and executes the fn on each node. +func (t *Tree) Iterate(fn func(node)) { + root := t.node(1) + t.iterate(root, fn) +} + +// IterateKV iterates through all keys and values in the tree. +// If newVal is non-zero, it will be set in the tree. +func (t *Tree) IterateKV(f func(key, val uint64) (newVal uint64)) { + t.Iterate(func(n node) { + // Only leaf nodes contain keys. + if !n.isLeaf() { + return + } + + for i := 0; i < n.numKeys(); i++ { + key := n.key(i) + val := n.val(i) + + // A zero value here means that this is a bogus entry. + if val == 0 { + continue + } + + newVal := f(key, val) + if newVal != 0 { + n.setAt(valOffset(i), newVal) + } + } + }) +} + +func (t *Tree) print(n node, parentID uint64) { + n.print(parentID) + if n.isLeaf() { + return + } + pid := n.pageID() + for i := 0; i < maxKeys; i++ { + if n.key(i) == 0 { + return + } + childID := n.uint64(valOffset(i)) + child := t.node(childID) + t.print(child, pid) + } +} + +// Print iterates over the tree and prints all valid KVs. +func (t *Tree) Print() { + root := t.node(1) + t.print(root, 0) +} + +// Splits the node into two. It moves right half of the keys from the original node to a newly +// created right node. It returns the right node. +func (t *Tree) split(pid uint64) node { + n := t.node(pid) + if !n.isFull() { + panic("This should be called only when n is full") + } + + // Create a new node nn, copy over half the keys from n, and set the parent to n's parent. + nn := t.newNode(n.bits()) + // Re-read n as the underlying buffer for tree might have changed during newNode. + n = t.node(pid) + rightHalf := n[keyOffset(maxKeys/2):keyOffset(maxKeys)] + copy(nn, rightHalf) + nn.setNumKeys(maxKeys - maxKeys/2) + + // Remove entries from node n. + zeroOut(rightHalf) + n.setNumKeys(maxKeys / 2) + return nn +} + +// shareWithSiblingXXX is unused for now. The idea is to move some keys to +// sibling when a node is full. But, I don't see any special benefits in our +// access pattern. It doesn't result in better occupancy ratios. +func (t *Tree) shareWithSiblingXXX(n node, idx int) bool { + if idx == 0 { + return false + } + left := t.node(n.val(idx - 1)) + ns := left.numKeys() + if ns >= maxKeys/2 { + // Sibling is already getting full. + return false + } + + right := t.node(n.val(idx)) + // Copy over keys from right child to left child. + copied := copy(left[keyOffset(ns):], right[:keyOffset(oneThird)]) + copied /= 2 // Considering that key-val constitute one key. + left.setNumKeys(ns + copied) + + // Update the max key in parent node n for the left sibling. + n.setAt(keyOffset(idx-1), left.maxKey()) + + // Now move keys to left for the right sibling. + until := copy(right, right[keyOffset(oneThird):keyOffset(maxKeys)]) + right.setNumKeys(until / 2) + zeroOut(right[until:keyOffset(maxKeys)]) + return true +} + +// Each node in the node is of size pageSize. Two kinds of nodes. Leaf nodes and internal nodes. +// Leaf nodes only contain the data. Internal nodes would contain the key and the offset to the +// child node. +// Internal node would have first entry as +// <0 offset to child>, <1000 offset>, <5000 offset>, and so on... +// Leaf nodes would just have: , , and so on... +// Last 16 bytes of the node are off limits. +// | pageID (8 bytes) | metaBits (1 byte) | 3 free bytes | numKeys (4 bytes) | +type node []uint64 + +func (n node) uint64(start int) uint64 { return n[start] } + +// func (n node) uint32(start int) uint32 { return *(*uint32)(unsafe.Pointer(&n[start])) } + +func keyOffset(i int) int { return 2 * i } +func valOffset(i int) int { return 2*i + 1 } +func (n node) numKeys() int { return int(n.uint64(valOffset(maxKeys)) & 0xFFFFFFFF) } +func (n node) pageID() uint64 { return n.uint64(keyOffset(maxKeys)) } +func (n node) key(i int) uint64 { return n.uint64(keyOffset(i)) } +func (n node) val(i int) uint64 { return n.uint64(valOffset(i)) } +func (n node) data(i int) []uint64 { return n[keyOffset(i):keyOffset(i+1)] } + +func (n node) setAt(start int, k uint64) { + n[start] = k +} + +func (n node) setNumKeys(num int) { + idx := valOffset(maxKeys) + val := n[idx] + val &= 0xFFFFFFFF00000000 + val |= uint64(num) + n[idx] = val +} + +func (n node) moveRight(lo int) { + hi := n.numKeys() + assert(hi != maxKeys) + // copy works despite of overlap in src and dst. + // See https://golang.org/pkg/builtin/#copy + copy(n[keyOffset(lo+1):keyOffset(hi+1)], n[keyOffset(lo):keyOffset(hi)]) +} + +const ( + bitLeaf = uint64(1 << 63) +) + +func (n node) setBit(b uint64) { + vo := valOffset(maxKeys) + val := n[vo] + val &= 0xFFFFFFFF + val |= b + n[vo] = val +} +func (n node) bits() uint64 { + return n.val(maxKeys) & 0xFF00000000000000 +} +func (n node) isLeaf() bool { + return n.bits()&bitLeaf > 0 +} + +// isFull checks that the node is already full. +func (n node) isFull() bool { + return n.numKeys() == maxKeys +} + +// Search returns the index of a smallest key >= k in a node. +func (n node) search(k uint64) int { + N := n.numKeys() + if N < 4 { + for i := 0; i < N; i++ { + if ki := n.key(i); ki >= k { + return i + } + } + return N + } + return int(simd.Search(n[:2*N], k)) + // lo, hi := 0, N + // // Reduce the search space using binary seach and then do linear search. + // for hi-lo > 32 { + // mid := (hi + lo) / 2 + // km := n.key(mid) + // if k == km { + // return mid + // } + // if k > km { + // // key is greater than the key at mid, so move right. + // lo = mid + 1 + // } else { + // // else move left. + // hi = mid + // } + // } + // for i := lo; i <= hi; i++ { + // if ki := n.key(i); ki >= k { + // return i + // } + // } + // return N +} +func (n node) maxKey() uint64 { + idx := n.numKeys() + // idx points to the first key which is zero. + if idx > 0 { + idx-- + } + return n.key(idx) +} + +// compacts the node i.e., remove all the kvs with value < lo. It returns the remaining number of +// keys. +func (n node) compact(lo uint64) int { + N := n.numKeys() + mk := n.maxKey() + var left, right int + for right = 0; right < N; right++ { + if n.val(right) < lo && n.key(right) < mk { + // Skip over this key. Don't copy it. + continue + } + // Valid data. Copy it from right to left. Advance left. + if left != right { + copy(n.data(left), n.data(right)) + } + left++ + } + // zero out rest of the kv pairs. + zeroOut(n[keyOffset(left):keyOffset(right)]) + n.setNumKeys(left) + + // If the only key we have is the max key, and its value is less than lo, then we can indicate + // to the caller by returning a zero that it's OK to drop the node. + if left == 1 && n.key(0) == mk && n.val(0) < lo { + return 0 + } + return left +} + +func (n node) get(k uint64) uint64 { + idx := n.search(k) + // key is not found + if idx == n.numKeys() { + return 0 + } + if ki := n.key(idx); ki == k { + return n.val(idx) + } + return 0 +} + +// set returns true if it added a new key. +func (n node) set(k, v uint64) (numAdded int) { + idx := n.search(k) + ki := n.key(idx) + if n.numKeys() == maxKeys { + // This happens during split of non-root node, when we are updating the child pointer of + // right node. Hence, the key should already exist. + assert(ki == k) + } + if ki > k { + // Found the first entry which is greater than k. So, we need to fit k + // just before it. For that, we should move the rest of the data in the + // node to the right to make space for k. + n.moveRight(idx) + } + // If the k does not exist already, increment the number of keys. + if ki != k { + n.setNumKeys(n.numKeys() + 1) + numAdded = 1 + } + if ki == 0 || ki >= k { + n.setAt(keyOffset(idx), k) + n.setAt(valOffset(idx), v) + return + } + panic("shouldn't reach here") +} + +func (n node) iterate(fn func(node, int)) { + for i := 0; i < maxKeys; i++ { + if k := n.key(i); k > 0 { + fn(n, i) + } else { + break + } + } +} + +func (n node) print(parentID uint64) { + var keys []string + n.iterate(func(n node, i int) { + keys = append(keys, fmt.Sprintf("%d", n.key(i))) + }) + if len(keys) > 8 { + copy(keys[4:], keys[len(keys)-4:]) + keys[3] = "..." + keys = keys[:8] + } + fmt.Printf("%d Child of: %d num keys: %d keys: %s\n", + n.pageID(), parentID, n.numKeys(), strings.Join(keys, " ")) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/buffer.go b/vendor/github.com/dgraph-io/ristretto/z/buffer.go new file mode 100644 index 0000000000..5a22de8c7f --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/buffer.go @@ -0,0 +1,544 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "encoding/binary" + "fmt" + "io/ioutil" + "os" + "sort" + "sync/atomic" + + "github.com/golang/glog" + "github.com/pkg/errors" +) + +const ( + defaultCapacity = 64 + defaultTag = "buffer" +) + +// Buffer is equivalent of bytes.Buffer without the ability to read. It is NOT thread-safe. +// +// In UseCalloc mode, z.Calloc is used to allocate memory, which depending upon how the code is +// compiled could use jemalloc for allocations. +// +// In UseMmap mode, Buffer uses file mmap to allocate memory. This allows us to store big data +// structures without using physical memory. +// +// MaxSize can be set to limit the memory usage. +type Buffer struct { + padding uint64 // number of starting bytes used for padding + offset uint64 // used length of the buffer + buf []byte // backing slice for the buffer + bufType BufferType // type of the underlying buffer + curSz int // capacity of the buffer + maxSz int // causes a panic if the buffer grows beyond this size + mmapFile *MmapFile // optional mmap backing for the buffer + autoMmapAfter int // Calloc falls back to an mmaped tmpfile after crossing this size + autoMmapDir string // directory for autoMmap to create a tempfile in + persistent bool // when enabled, Release will not delete the underlying mmap file + tag string // used for jemalloc stats +} + +func NewBuffer(capacity int, tag string) *Buffer { + if capacity < defaultCapacity { + capacity = defaultCapacity + } + if tag == "" { + tag = defaultTag + } + return &Buffer{ + buf: Calloc(capacity, tag), + bufType: UseCalloc, + curSz: capacity, + offset: 8, + padding: 8, + tag: tag, + } +} + +// It is the caller's responsibility to set offset after this, because Buffer +// doesn't remember what it was. +func NewBufferPersistent(path string, capacity int) (*Buffer, error) { + file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) + if err != nil { + return nil, err + } + buffer, err := newBufferFile(file, capacity) + if err != nil { + return nil, err + } + buffer.persistent = true + return buffer, nil +} + +func NewBufferTmp(dir string, capacity int) (*Buffer, error) { + if dir == "" { + dir = tmpDir + } + file, err := ioutil.TempFile(dir, "buffer") + if err != nil { + return nil, err + } + return newBufferFile(file, capacity) +} + +func newBufferFile(file *os.File, capacity int) (*Buffer, error) { + if capacity < defaultCapacity { + capacity = defaultCapacity + } + mmapFile, err := OpenMmapFileUsing(file, capacity, true) + if err != nil && err != NewFile { + return nil, err + } + buf := &Buffer{ + buf: mmapFile.Data, + bufType: UseMmap, + curSz: len(mmapFile.Data), + mmapFile: mmapFile, + offset: 8, + padding: 8, + } + return buf, nil +} + +func NewBufferSlice(slice []byte) *Buffer { + return &Buffer{ + offset: uint64(len(slice)), + buf: slice, + bufType: UseInvalid, + } +} + +func (b *Buffer) WithAutoMmap(threshold int, path string) *Buffer { + if b.bufType != UseCalloc { + panic("can only autoMmap with UseCalloc") + } + b.autoMmapAfter = threshold + if path == "" { + b.autoMmapDir = tmpDir + } else { + b.autoMmapDir = path + } + return b +} + +func (b *Buffer) WithMaxSize(size int) *Buffer { + b.maxSz = size + return b +} + +func (b *Buffer) IsEmpty() bool { + return int(b.offset) == b.StartOffset() +} + +// LenWithPadding would return the number of bytes written to the buffer so far +// plus the padding at the start of the buffer. +func (b *Buffer) LenWithPadding() int { + return int(atomic.LoadUint64(&b.offset)) +} + +// LenNoPadding would return the number of bytes written to the buffer so far +// (without the padding). +func (b *Buffer) LenNoPadding() int { + return int(atomic.LoadUint64(&b.offset) - b.padding) +} + +// Bytes would return all the written bytes as a slice. +func (b *Buffer) Bytes() []byte { + off := atomic.LoadUint64(&b.offset) + return b.buf[b.padding:off] +} + +// Grow would grow the buffer to have at least n more bytes. In case the buffer is at capacity, it +// would reallocate twice the size of current capacity + n, to ensure n bytes can be written to the +// buffer without further allocation. In UseMmap mode, this might result in underlying file +// expansion. +func (b *Buffer) Grow(n int) { + if b.buf == nil { + panic("z.Buffer needs to be initialized before using") + } + if b.maxSz > 0 && int(b.offset)+n > b.maxSz { + err := fmt.Errorf( + "z.Buffer max size exceeded: %d offset: %d grow: %d", b.maxSz, b.offset, n) + panic(err) + } + if int(b.offset)+n < b.curSz { + return + } + + // Calculate new capacity. + growBy := b.curSz + n + // Don't allocate more than 1GB at a time. + if growBy > 1<<30 { + growBy = 1 << 30 + } + // Allocate at least n, even if it exceeds the 1GB limit above. + if n > growBy { + growBy = n + } + b.curSz += growBy + + switch b.bufType { + case UseCalloc: + // If autoMmap gets triggered, copy the slice over to an mmaped file. + if b.autoMmapAfter > 0 && b.curSz > b.autoMmapAfter { + b.bufType = UseMmap + file, err := ioutil.TempFile(b.autoMmapDir, "") + if err != nil { + panic(err) + } + mmapFile, err := OpenMmapFileUsing(file, b.curSz, true) + if err != nil && err != NewFile { + panic(err) + } + assert(int(b.offset) == copy(mmapFile.Data, b.buf[:b.offset])) + Free(b.buf) + b.mmapFile = mmapFile + b.buf = mmapFile.Data + break + } + + // Else, reallocate the slice. + newBuf := Calloc(b.curSz, b.tag) + assert(int(b.offset) == copy(newBuf, b.buf[:b.offset])) + Free(b.buf) + b.buf = newBuf + + case UseMmap: + // Truncate and remap the underlying file. + if err := b.mmapFile.Truncate(int64(b.curSz)); err != nil { + err = errors.Wrapf(err, + "while trying to truncate file: %s to size: %d", b.mmapFile.Fd.Name(), b.curSz) + panic(err) + } + b.buf = b.mmapFile.Data + + default: + panic("can only use Grow on UseCalloc and UseMmap buffers") + } +} + +// Allocate is a way to get a slice of size n back from the buffer. This slice can be directly +// written to. Warning: Allocate is not thread-safe. The byte slice returned MUST be used before +// further calls to Buffer. +func (b *Buffer) Allocate(n int) []byte { + b.Grow(n) + off := b.offset + b.offset += uint64(n) + return b.buf[off:int(b.offset)] +} + +// AllocateOffset works the same way as allocate, but instead of returning a byte slice, it returns +// the offset of the allocation. +func (b *Buffer) AllocateOffset(n int) int { + b.Grow(n) + b.offset += uint64(n) + return int(b.offset) - n +} + +func (b *Buffer) writeLen(sz int) { + buf := b.Allocate(4) + binary.BigEndian.PutUint32(buf, uint32(sz)) +} + +// SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate, +// hence returning the slice of size sz. This can be used to allocate a lot of small buffers into +// this big buffer. +// Note that SliceAllocate should NOT be mixed with normal calls to Write. +func (b *Buffer) SliceAllocate(sz int) []byte { + b.Grow(4 + sz) + b.writeLen(sz) + return b.Allocate(sz) +} + +func (b *Buffer) StartOffset() int { + return int(b.padding) +} + +func (b *Buffer) WriteSlice(slice []byte) { + dst := b.SliceAllocate(len(slice)) + assert(len(slice) == copy(dst, slice)) +} + +func (b *Buffer) SliceIterate(f func(slice []byte) error) error { + if b.IsEmpty() { + return nil + } + slice, next := []byte{}, b.StartOffset() + for next >= 0 { + slice, next = b.Slice(next) + if len(slice) == 0 { + continue + } + if err := f(slice); err != nil { + return err + } + } + return nil +} + +const ( + UseCalloc BufferType = iota + UseMmap + UseInvalid +) + +type BufferType int + +func (t BufferType) String() string { + switch t { + case UseCalloc: + return "UseCalloc" + case UseMmap: + return "UseMmap" + default: + return "UseInvalid" + } +} + +type LessFunc func(a, b []byte) bool +type sortHelper struct { + offsets []int + b *Buffer + tmp *Buffer + less LessFunc + small []int +} + +func (s *sortHelper) sortSmall(start, end int) { + s.tmp.Reset() + s.small = s.small[:0] + next := start + for next >= 0 && next < end { + s.small = append(s.small, next) + _, next = s.b.Slice(next) + } + + // We are sorting the slices pointed to by s.small offsets, but only moving the offsets around. + sort.Slice(s.small, func(i, j int) bool { + left, _ := s.b.Slice(s.small[i]) + right, _ := s.b.Slice(s.small[j]) + return s.less(left, right) + }) + // Now we iterate over the s.small offsets and copy over the slices. The result is now in order. + for _, off := range s.small { + s.tmp.Write(rawSlice(s.b.buf[off:])) + } + assert(end-start == copy(s.b.buf[start:end], s.tmp.Bytes())) +} + +func assert(b bool) { + if !b { + glog.Fatalf("%+v", errors.Errorf("Assertion failure")) + } +} +func check(err error) { + if err != nil { + glog.Fatalf("%+v", err) + } +} +func check2(_ interface{}, err error) { + check(err) +} + +func (s *sortHelper) merge(left, right []byte, start, end int) { + if len(left) == 0 || len(right) == 0 { + return + } + s.tmp.Reset() + check2(s.tmp.Write(left)) + left = s.tmp.Bytes() + + var ls, rs []byte + + copyLeft := func() { + assert(len(ls) == copy(s.b.buf[start:], ls)) + left = left[len(ls):] + start += len(ls) + } + copyRight := func() { + assert(len(rs) == copy(s.b.buf[start:], rs)) + right = right[len(rs):] + start += len(rs) + } + + for start < end { + if len(left) == 0 { + assert(len(right) == copy(s.b.buf[start:end], right)) + return + } + if len(right) == 0 { + assert(len(left) == copy(s.b.buf[start:end], left)) + return + } + ls = rawSlice(left) + rs = rawSlice(right) + + // We skip the first 4 bytes in the rawSlice, because that stores the length. + if s.less(ls[4:], rs[4:]) { + copyLeft() + } else { + copyRight() + } + } +} + +func (s *sortHelper) sort(lo, hi int) []byte { + assert(lo <= hi) + + mid := lo + (hi-lo)/2 + loff, hoff := s.offsets[lo], s.offsets[hi] + if lo == mid { + // No need to sort, just return the buffer. + return s.b.buf[loff:hoff] + } + + // lo, mid would sort from [offset[lo], offset[mid]) . + left := s.sort(lo, mid) + // Typically we'd use mid+1, but here mid represents an offset in the buffer. Each offset + // contains a thousand entries. So, if we do mid+1, we'd skip over those entries. + right := s.sort(mid, hi) + + s.merge(left, right, loff, hoff) + return s.b.buf[loff:hoff] +} + +// SortSlice is like SortSliceBetween but sorting over the entire buffer. +func (b *Buffer) SortSlice(less func(left, right []byte) bool) { + b.SortSliceBetween(b.StartOffset(), int(b.offset), less) +} +func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) { + if start >= end { + return + } + if start == 0 { + panic("start can never be zero") + } + + var offsets []int + next, count := start, 0 + for next >= 0 && next < end { + if count%1024 == 0 { + offsets = append(offsets, next) + } + _, next = b.Slice(next) + count++ + } + assert(len(offsets) > 0) + if offsets[len(offsets)-1] != end { + offsets = append(offsets, end) + } + + szTmp := int(float64((end-start)/2) * 1.1) + s := &sortHelper{ + offsets: offsets, + b: b, + less: less, + small: make([]int, 0, 1024), + tmp: NewBuffer(szTmp, b.tag), + } + defer s.tmp.Release() + + left := offsets[0] + for _, off := range offsets[1:] { + s.sortSmall(left, off) + left = off + } + s.sort(0, len(offsets)-1) +} + +func rawSlice(buf []byte) []byte { + sz := binary.BigEndian.Uint32(buf) + return buf[:4+int(sz)] +} + +// Slice would return the slice written at offset. +func (b *Buffer) Slice(offset int) ([]byte, int) { + if offset >= int(b.offset) { + return nil, -1 + } + + sz := binary.BigEndian.Uint32(b.buf[offset:]) + start := offset + 4 + next := start + int(sz) + res := b.buf[start:next] + if next >= int(b.offset) { + next = -1 + } + return res, next +} + +// SliceOffsets is an expensive function. Use sparingly. +func (b *Buffer) SliceOffsets() []int { + next := b.StartOffset() + var offsets []int + for next >= 0 { + offsets = append(offsets, next) + _, next = b.Slice(next) + } + return offsets +} + +func (b *Buffer) Data(offset int) []byte { + if offset > b.curSz { + panic("offset beyond current size") + } + return b.buf[offset:b.curSz] +} + +// Write would write p bytes to the buffer. +func (b *Buffer) Write(p []byte) (n int, err error) { + n = len(p) + b.Grow(n) + assert(n == copy(b.buf[b.offset:], p)) + b.offset += uint64(n) + return n, nil +} + +// Reset would reset the buffer to be reused. +func (b *Buffer) Reset() { + b.offset = uint64(b.StartOffset()) +} + +// Release would free up the memory allocated by the buffer. Once the usage of buffer is done, it is +// important to call Release, otherwise a memory leak can happen. +func (b *Buffer) Release() error { + if b == nil { + return nil + } + switch b.bufType { + case UseCalloc: + Free(b.buf) + case UseMmap: + if b.mmapFile == nil { + return nil + } + path := b.mmapFile.Fd.Name() + if err := b.mmapFile.Close(-1); err != nil { + return errors.Wrapf(err, "while closing file: %s", path) + } + if !b.persistent { + if err := os.Remove(path); err != nil { + return errors.Wrapf(err, "while deleting file %s", path) + } + } + } + return nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/calloc.go b/vendor/github.com/dgraph-io/ristretto/z/calloc.go new file mode 100644 index 0000000000..2e5d613813 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/calloc.go @@ -0,0 +1,42 @@ +package z + +import "sync/atomic" + +var numBytes int64 + +// NumAllocBytes returns the number of bytes allocated using calls to z.Calloc. The allocations +// could be happening via either Go or jemalloc, depending upon the build flags. +func NumAllocBytes() int64 { + return atomic.LoadInt64(&numBytes) +} + +// MemStats is used to fetch JE Malloc Stats. The stats are fetched from +// the mallctl namespace http://jemalloc.net/jemalloc.3.html#mallctl_namespace. +type MemStats struct { + // Total number of bytes allocated by the application. + // http://jemalloc.net/jemalloc.3.html#stats.allocated + Allocated uint64 + // Total number of bytes in active pages allocated by the application. This + // is a multiple of the page size, and greater than or equal to + // Allocated. + // http://jemalloc.net/jemalloc.3.html#stats.active + Active uint64 + // Maximum number of bytes in physically resident data pages mapped by the + // allocator, comprising all pages dedicated to allocator metadata, pages + // backing active allocations, and unused dirty pages. This is a maximum + // rather than precise because pages may not actually be physically + // resident if they correspond to demand-zeroed virtual memory that has not + // yet been touched. This is a multiple of the page size, and is larger + // than stats.active. + // http://jemalloc.net/jemalloc.3.html#stats.resident + Resident uint64 + // Total number of bytes in virtual memory mappings that were retained + // rather than being returned to the operating system via e.g. munmap(2) or + // similar. Retained virtual memory is typically untouched, decommitted, or + // purged, so it has no strongly associated physical memory (see extent + // hooks http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks for + // details). Retained memory is excluded from mapped memory statistics, + // e.g. stats.mapped (http://jemalloc.net/jemalloc.3.html#stats.mapped). + // http://jemalloc.net/jemalloc.3.html#stats.retained + Retained uint64 +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/calloc_32bit.go b/vendor/github.com/dgraph-io/ristretto/z/calloc_32bit.go new file mode 100644 index 0000000000..3a0442614f --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/calloc_32bit.go @@ -0,0 +1,14 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// +build 386 amd64p32 arm armbe mips mipsle mips64p32 mips64p32le ppc sparc + +package z + +const ( + // MaxArrayLen is a safe maximum length for slices on this architecture. + MaxArrayLen = 1<<31 - 1 + // MaxBufferSize is the size of virtually unlimited buffer on this architecture. + MaxBufferSize = 1 << 30 +) diff --git a/vendor/github.com/dgraph-io/ristretto/z/calloc_64bit.go b/vendor/github.com/dgraph-io/ristretto/z/calloc_64bit.go new file mode 100644 index 0000000000..b898248bba --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/calloc_64bit.go @@ -0,0 +1,14 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// +build amd64 arm64 arm64be ppc64 ppc64le mips64 mips64le riscv64 s390x sparc64 + +package z + +const ( + // MaxArrayLen is a safe maximum length for slices on this architecture. + MaxArrayLen = 1<<50 - 1 + // MaxBufferSize is the size of virtually unlimited buffer on this architecture. + MaxBufferSize = 256 << 30 +) diff --git a/vendor/github.com/dgraph-io/ristretto/z/calloc_jemalloc.go b/vendor/github.com/dgraph-io/ristretto/z/calloc_jemalloc.go new file mode 100644 index 0000000000..904d73ac57 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/calloc_jemalloc.go @@ -0,0 +1,172 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// +build jemalloc + +package z + +/* +#cgo LDFLAGS: /usr/local/lib/libjemalloc.a -L/usr/local/lib -Wl,-rpath,/usr/local/lib -ljemalloc -lm -lstdc++ -pthread -ldl +#include +#include +*/ +import "C" +import ( + "bytes" + "fmt" + "sync" + "sync/atomic" + "unsafe" + + "github.com/dustin/go-humanize" +) + +// The go:linkname directives provides backdoor access to private functions in +// the runtime. Below we're accessing the throw function. + +//go:linkname throw runtime.throw +func throw(s string) + +// New allocates a slice of size n. The returned slice is from manually managed +// memory and MUST be released by calling Free. Failure to do so will result in +// a memory leak. +// +// Compile jemalloc with ./configure --with-jemalloc-prefix="je_" +// https://android.googlesource.com/platform/external/jemalloc_new/+/6840b22e8e11cb68b493297a5cd757d6eaa0b406/TUNING.md +// These two config options seems useful for frequent allocations and deallocations in +// multi-threaded programs (like we have). +// JE_MALLOC_CONF="background_thread:true,metadata_thp:auto" +// +// Compile Go program with `go build -tags=jemalloc` to enable this. + +type dalloc struct { + t string + sz int +} + +var dallocsMu sync.Mutex +var dallocs map[unsafe.Pointer]*dalloc + +func init() { + // By initializing dallocs, we can start tracking allocations and deallocations via z.Calloc. + dallocs = make(map[unsafe.Pointer]*dalloc) +} + +func Calloc(n int, tag string) []byte { + if n == 0 { + return make([]byte, 0) + } + // We need to be conscious of the Cgo pointer passing rules: + // + // https://golang.org/cmd/cgo/#hdr-Passing_pointers + // + // ... + // Note: the current implementation has a bug. While Go code is permitted + // to write nil or a C pointer (but not a Go pointer) to C memory, the + // current implementation may sometimes cause a runtime error if the + // contents of the C memory appear to be a Go pointer. Therefore, avoid + // passing uninitialized C memory to Go code if the Go code is going to + // store pointer values in it. Zero out the memory in C before passing it + // to Go. + + ptr := C.je_calloc(C.size_t(n), 1) + if ptr == nil { + // NB: throw is like panic, except it guarantees the process will be + // terminated. The call below is exactly what the Go runtime invokes when + // it cannot allocate memory. + throw("out of memory") + } + + uptr := unsafe.Pointer(ptr) + dallocsMu.Lock() + dallocs[uptr] = &dalloc{ + t: tag, + sz: n, + } + dallocsMu.Unlock() + atomic.AddInt64(&numBytes, int64(n)) + // Interpret the C pointer as a pointer to a Go array, then slice. + return (*[MaxArrayLen]byte)(uptr)[:n:n] +} + +// CallocNoRef does the exact same thing as Calloc with jemalloc enabled. +func CallocNoRef(n int, tag string) []byte { + return Calloc(n, tag) +} + +// Free frees the specified slice. +func Free(b []byte) { + if sz := cap(b); sz != 0 { + b = b[:cap(b)] + ptr := unsafe.Pointer(&b[0]) + C.je_free(ptr) + atomic.AddInt64(&numBytes, -int64(sz)) + dallocsMu.Lock() + delete(dallocs, ptr) + dallocsMu.Unlock() + } +} + +func Leaks() string { + if dallocs == nil { + return "Leak detection disabled. Enable with 'leak' build flag." + } + dallocsMu.Lock() + defer dallocsMu.Unlock() + if len(dallocs) == 0 { + return "NO leaks found." + } + m := make(map[string]int) + for _, da := range dallocs { + m[da.t] += da.sz + } + var buf bytes.Buffer + fmt.Fprintf(&buf, "Allocations:\n") + for f, sz := range m { + fmt.Fprintf(&buf, "%s at file: %s\n", humanize.IBytes(uint64(sz)), f) + } + return buf.String() +} + +// ReadMemStats populates stats with JE Malloc statistics. +func ReadMemStats(stats *MemStats) { + if stats == nil { + return + } + // Call an epoch mallclt to refresh the stats data as mentioned in the docs. + // http://jemalloc.net/jemalloc.3.html#epoch + // Note: This epoch mallctl is as expensive as a malloc call. It takes up the + // malloc_mutex_lock. + epoch := 1 + sz := unsafe.Sizeof(&epoch) + C.je_mallctl( + (C.CString)("epoch"), + unsafe.Pointer(&epoch), + (*C.size_t)(unsafe.Pointer(&sz)), + unsafe.Pointer(&epoch), + (C.size_t)(unsafe.Sizeof(epoch))) + stats.Allocated = fetchStat("stats.allocated") + stats.Active = fetchStat("stats.active") + stats.Resident = fetchStat("stats.resident") + stats.Retained = fetchStat("stats.retained") +} + +// fetchStat is used to read a specific attribute from je malloc stats using mallctl. +func fetchStat(s string) uint64 { + var out uint64 + sz := unsafe.Sizeof(&out) + C.je_mallctl( + (C.CString)(s), // Query: eg: stats.allocated, stats.resident, etc. + unsafe.Pointer(&out), // Variable to store the output. + (*C.size_t)(unsafe.Pointer(&sz)), // Size of the output variable. + nil, // Input variable used to set a value. + 0) // Size of the input variable. + return out +} + +func StatsPrint() { + opts := C.CString("mdablxe") + C.je_malloc_stats_print(nil, nil, opts) + C.free(unsafe.Pointer(opts)) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/calloc_nojemalloc.go b/vendor/github.com/dgraph-io/ristretto/z/calloc_nojemalloc.go new file mode 100644 index 0000000000..93ceedf906 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/calloc_nojemalloc.go @@ -0,0 +1,37 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// +build !jemalloc !cgo + +package z + +import ( + "fmt" +) + +// Provides versions of Calloc, CallocNoRef, etc when jemalloc is not available +// (eg: build without jemalloc tag). + +// Calloc allocates a slice of size n. +func Calloc(n int, tag string) []byte { + return make([]byte, n) +} + +// CallocNoRef will not give you memory back without jemalloc. +func CallocNoRef(n int, tag string) []byte { + // We do the add here just to stay compatible with a corresponding Free call. + return nil +} + +// Free does not do anything in this mode. +func Free(b []byte) {} + +func Leaks() string { return "Leaks: Using Go memory" } +func StatsPrint() { + fmt.Println("Using Go memory") +} + +// ReadMemStats doesn't do anything since all the memory is being managed +// by the Go runtime. +func ReadMemStats(_ *MemStats) { return } diff --git a/vendor/github.com/dgraph-io/ristretto/z/file.go b/vendor/github.com/dgraph-io/ristretto/z/file.go new file mode 100644 index 0000000000..880caf0ad9 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/file.go @@ -0,0 +1,217 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "encoding/binary" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/pkg/errors" +) + +// MmapFile represents an mmapd file and includes both the buffer to the data +// and the file descriptor. +type MmapFile struct { + Data []byte + Fd *os.File +} + +var NewFile = errors.New("Create a new file") + +func OpenMmapFileUsing(fd *os.File, sz int, writable bool) (*MmapFile, error) { + filename := fd.Name() + fi, err := fd.Stat() + if err != nil { + return nil, errors.Wrapf(err, "cannot stat file: %s", filename) + } + + var rerr error + fileSize := fi.Size() + if sz > 0 && fileSize == 0 { + // If file is empty, truncate it to sz. + if err := fd.Truncate(int64(sz)); err != nil { + return nil, errors.Wrapf(err, "error while truncation") + } + fileSize = int64(sz) + rerr = NewFile + } + + // fmt.Printf("Mmaping file: %s with writable: %v filesize: %d\n", fd.Name(), writable, fileSize) + buf, err := Mmap(fd, writable, fileSize) // Mmap up to file size. + if err != nil { + return nil, errors.Wrapf(err, "while mmapping %s with size: %d", fd.Name(), fileSize) + } + + if fileSize == 0 { + dir, _ := filepath.Split(filename) + go SyncDir(dir) + } + return &MmapFile{ + Data: buf, + Fd: fd, + }, rerr +} + +// OpenMmapFile opens an existing file or creates a new file. If the file is +// created, it would truncate the file to maxSz. In both cases, it would mmap +// the file to maxSz and returned it. In case the file is created, z.NewFile is +// returned. +func OpenMmapFile(filename string, flag int, maxSz int) (*MmapFile, error) { + // fmt.Printf("opening file %s with flag: %v\n", filename, flag) + fd, err := os.OpenFile(filename, flag, 0666) + if err != nil { + return nil, errors.Wrapf(err, "unable to open: %s", filename) + } + writable := true + if flag == os.O_RDONLY { + writable = false + } + return OpenMmapFileUsing(fd, maxSz, writable) +} + +type mmapReader struct { + Data []byte + offset int +} + +func (mr *mmapReader) Read(buf []byte) (int, error) { + if mr.offset > len(mr.Data) { + return 0, io.EOF + } + n := copy(buf, mr.Data[mr.offset:]) + mr.offset += n + if n < len(buf) { + return n, io.EOF + } + return n, nil +} + +func (m *MmapFile) NewReader(offset int) io.Reader { + return &mmapReader{ + Data: m.Data, + offset: offset, + } +} + +// Bytes returns data starting from offset off of size sz. If there's not enough data, it would +// return nil slice and io.EOF. +func (m *MmapFile) Bytes(off, sz int) ([]byte, error) { + if len(m.Data[off:]) < sz { + return nil, io.EOF + } + return m.Data[off : off+sz], nil +} + +// Slice returns the slice at the given offset. +func (m *MmapFile) Slice(offset int) []byte { + sz := binary.BigEndian.Uint32(m.Data[offset:]) + start := offset + 4 + next := start + int(sz) + if next > len(m.Data) { + return []byte{} + } + res := m.Data[start:next] + return res +} + +// AllocateSlice allocates a slice of the given size at the given offset. +func (m *MmapFile) AllocateSlice(sz, offset int) ([]byte, int, error) { + start := offset + 4 + + // If the file is too small, double its size or increase it by 1GB, whichever is smaller. + if start+sz > len(m.Data) { + const oneGB = 1 << 30 + growBy := len(m.Data) + if growBy > oneGB { + growBy = oneGB + } + if growBy < sz+4 { + growBy = sz + 4 + } + if err := m.Truncate(int64(len(m.Data) + growBy)); err != nil { + return nil, 0, err + } + } + + binary.BigEndian.PutUint32(m.Data[offset:], uint32(sz)) + return m.Data[start : start+sz], start + sz, nil +} + +func (m *MmapFile) Sync() error { + if m == nil { + return nil + } + return Msync(m.Data) +} + +func (m *MmapFile) Delete() error { + // Badger can set the m.Data directly, without setting any Fd. In that case, this should be a + // NOOP. + if m.Fd == nil { + return nil + } + + if err := Munmap(m.Data); err != nil { + return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err) + } + m.Data = nil + if err := m.Fd.Truncate(0); err != nil { + return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err) + } + if err := m.Fd.Close(); err != nil { + return fmt.Errorf("while close file: %s, error: %v\n", m.Fd.Name(), err) + } + return os.Remove(m.Fd.Name()) +} + +// Close would close the file. It would also truncate the file if maxSz >= 0. +func (m *MmapFile) Close(maxSz int64) error { + // Badger can set the m.Data directly, without setting any Fd. In that case, this should be a + // NOOP. + if m.Fd == nil { + return nil + } + if err := m.Sync(); err != nil { + return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err) + } + if err := Munmap(m.Data); err != nil { + return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err) + } + if maxSz >= 0 { + if err := m.Fd.Truncate(maxSz); err != nil { + return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err) + } + } + return m.Fd.Close() +} + +func SyncDir(dir string) error { + df, err := os.Open(dir) + if err != nil { + return errors.Wrapf(err, "while opening %s", dir) + } + if err := df.Sync(); err != nil { + return errors.Wrapf(err, "while syncing %s", dir) + } + if err := df.Close(); err != nil { + return errors.Wrapf(err, "while closing %s", dir) + } + return nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/file_default.go b/vendor/github.com/dgraph-io/ristretto/z/file_default.go new file mode 100644 index 0000000000..d9c0db43e7 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/file_default.go @@ -0,0 +1,39 @@ +// +build !linux + +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import "fmt" + +// Truncate would truncate the mmapped file to the given size. On Linux, we truncate +// the underlying file and then call mremap, but on other systems, we unmap first, +// then truncate, then re-map. +func (m *MmapFile) Truncate(maxSz int64) error { + if err := m.Sync(); err != nil { + return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err) + } + if err := Munmap(m.Data); err != nil { + return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err) + } + if err := m.Fd.Truncate(maxSz); err != nil { + return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err) + } + var err error + m.Data, err = Mmap(m.Fd, true, maxSz) // Mmap up to max size. + return err +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/file_linux.go b/vendor/github.com/dgraph-io/ristretto/z/file_linux.go new file mode 100644 index 0000000000..7f670bd2cc --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/file_linux.go @@ -0,0 +1,37 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "fmt" +) + +// Truncate would truncate the mmapped file to the given size. On Linux, we truncate +// the underlying file and then call mremap, but on other systems, we unmap first, +// then truncate, then re-map. +func (m *MmapFile) Truncate(maxSz int64) error { + if err := m.Sync(); err != nil { + return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err) + } + if err := m.Fd.Truncate(maxSz); err != nil { + return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err) + } + + var err error + m.Data, err = mremap(m.Data, int(maxSz)) // Mmap up to max size. + return err +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/flags.go b/vendor/github.com/dgraph-io/ristretto/z/flags.go new file mode 100644 index 0000000000..a55c474ab2 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/flags.go @@ -0,0 +1,311 @@ +package z + +import ( + "fmt" + "os" + "os/user" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/golang/glog" + "github.com/pkg/errors" +) + +// SuperFlagHelp makes it really easy to generate command line `--help` output for a SuperFlag. For +// example: +// +// const flagDefaults = `enabled=true; path=some/path;` +// +// var help string = z.NewSuperFlagHelp(flagDefaults). +// Flag("enabled", "Turns on ."). +// Flag("path", "The path to ."). +// Flag("another", "Not present in defaults, but still included."). +// String() +// +// The `help` string would then contain: +// +// enabled=true; Turns on . +// path=some/path; The path to . +// another=; Not present in defaults, but still included. +// +// All flags are sorted alphabetically for consistent `--help` output. Flags with default values are +// placed at the top, and everything else goes under. +type SuperFlagHelp struct { + head string + defaults *SuperFlag + flags map[string]string +} + +func NewSuperFlagHelp(defaults string) *SuperFlagHelp { + return &SuperFlagHelp{ + defaults: NewSuperFlag(defaults), + flags: make(map[string]string, 0), + } +} + +func (h *SuperFlagHelp) Head(head string) *SuperFlagHelp { + h.head = head + return h +} + +func (h *SuperFlagHelp) Flag(name, description string) *SuperFlagHelp { + h.flags[name] = description + return h +} + +func (h *SuperFlagHelp) String() string { + defaultLines := make([]string, 0) + otherLines := make([]string, 0) + for name, help := range h.flags { + val, found := h.defaults.m[name] + line := fmt.Sprintf(" %s=%s; %s\n", name, val, help) + if found { + defaultLines = append(defaultLines, line) + } else { + otherLines = append(otherLines, line) + } + } + sort.Strings(defaultLines) + sort.Strings(otherLines) + dls := strings.Join(defaultLines, "") + ols := strings.Join(otherLines, "") + if len(h.defaults.m) == 0 && len(ols) == 0 { + // remove last newline + dls = dls[:len(dls)-1] + } + // remove last newline + if len(h.defaults.m) == 0 && len(ols) > 1 { + ols = ols[:len(ols)-1] + } + return h.head + "\n" + dls + ols +} + +func parseFlag(flag string) (map[string]string, error) { + kvm := make(map[string]string) + for _, kv := range strings.Split(flag, ";") { + if strings.TrimSpace(kv) == "" { + continue + } + // For a non-empty separator, 0 < len(splits) ≤ 2. + splits := strings.SplitN(kv, "=", 2) + k := strings.TrimSpace(splits[0]) + if len(splits) < 2 { + return nil, fmt.Errorf("superflag: missing value for '%s' in flag: %s", k, flag) + } + k = strings.ToLower(k) + k = strings.ReplaceAll(k, "_", "-") + kvm[k] = strings.TrimSpace(splits[1]) + } + return kvm, nil +} + +type SuperFlag struct { + m map[string]string +} + +func NewSuperFlag(flag string) *SuperFlag { + sf, err := newSuperFlagImpl(flag) + if err != nil { + glog.Fatal(err) + } + return sf +} + +func newSuperFlagImpl(flag string) (*SuperFlag, error) { + m, err := parseFlag(flag) + if err != nil { + return nil, err + } + return &SuperFlag{m}, nil +} + +func (sf *SuperFlag) String() string { + if sf == nil { + return "" + } + kvs := make([]string, 0, len(sf.m)) + for k, v := range sf.m { + kvs = append(kvs, fmt.Sprintf("%s=%s", k, v)) + } + return strings.Join(kvs, "; ") +} + +func (sf *SuperFlag) MergeAndCheckDefault(flag string) *SuperFlag { + sf, err := sf.mergeAndCheckDefaultImpl(flag) + if err != nil { + glog.Fatal(err) + } + return sf +} + +func (sf *SuperFlag) mergeAndCheckDefaultImpl(flag string) (*SuperFlag, error) { + if sf == nil { + m, err := parseFlag(flag) + if err != nil { + return nil, err + } + return &SuperFlag{m}, nil + } + + src, err := parseFlag(flag) + if err != nil { + return nil, err + } + + numKeys := len(sf.m) + for k := range src { + if _, ok := sf.m[k]; ok { + numKeys-- + } + } + if numKeys != 0 { + return nil, fmt.Errorf("superflag: found invalid options in flag: %s.\nvalid options: %v", sf, flag) + } + for k, v := range src { + if _, ok := sf.m[k]; !ok { + sf.m[k] = v + } + } + return sf, nil +} + +func (sf *SuperFlag) Has(opt string) bool { + val := sf.GetString(opt) + return val != "" +} + +func (sf *SuperFlag) GetDuration(opt string) time.Duration { + val := sf.GetString(opt) + if val == "" { + return time.Duration(0) + } + if strings.Contains(val, "d") { + val = strings.Replace(val, "d", "", 1) + days, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return time.Duration(0) + } + return time.Hour * 24 * time.Duration(days) + } + d, err := time.ParseDuration(val) + if err != nil { + return time.Duration(0) + } + return d +} + +func (sf *SuperFlag) GetBool(opt string) bool { + val := sf.GetString(opt) + if val == "" { + return false + } + b, err := strconv.ParseBool(val) + if err != nil { + err = errors.Wrapf(err, + "Unable to parse %s as bool for key: %s. Options: %s\n", + val, opt, sf) + glog.Fatalf("%+v", err) + } + return b +} + +func (sf *SuperFlag) GetFloat64(opt string) float64 { + val := sf.GetString(opt) + if val == "" { + return 0 + } + f, err := strconv.ParseFloat(val, 64) + if err != nil { + err = errors.Wrapf(err, + "Unable to parse %s as float64 for key: %s. Options: %s\n", + val, opt, sf) + glog.Fatalf("%+v", err) + } + return f +} + +func (sf *SuperFlag) GetInt64(opt string) int64 { + val := sf.GetString(opt) + if val == "" { + return 0 + } + i, err := strconv.ParseInt(val, 0, 64) + if err != nil { + err = errors.Wrapf(err, + "Unable to parse %s as int64 for key: %s. Options: %s\n", + val, opt, sf) + glog.Fatalf("%+v", err) + } + return i +} + +func (sf *SuperFlag) GetUint64(opt string) uint64 { + val := sf.GetString(opt) + if val == "" { + return 0 + } + u, err := strconv.ParseUint(val, 0, 64) + if err != nil { + err = errors.Wrapf(err, + "Unable to parse %s as uint64 for key: %s. Options: %s\n", + val, opt, sf) + glog.Fatalf("%+v", err) + } + return u +} + +func (sf *SuperFlag) GetUint32(opt string) uint32 { + val := sf.GetString(opt) + if val == "" { + return 0 + } + u, err := strconv.ParseUint(val, 0, 32) + if err != nil { + err = errors.Wrapf(err, + "Unable to parse %s as uint32 for key: %s. Options: %s\n", + val, opt, sf) + glog.Fatalf("%+v", err) + } + return uint32(u) +} + +func (sf *SuperFlag) GetString(opt string) string { + if sf == nil { + return "" + } + return sf.m[opt] +} + +func (sf *SuperFlag) GetPath(opt string) string { + p := sf.GetString(opt) + path, err := expandPath(p) + if err != nil { + glog.Fatalf("Failed to get path: %+v", err) + } + return path +} + +// expandPath expands the paths containing ~ to /home/user. It also computes the absolute path +// from the relative paths. For example: ~/abc/../cef will be transformed to /home/user/cef. +func expandPath(path string) (string, error) { + if len(path) == 0 { + return "", nil + } + if path[0] == '~' && (len(path) == 1 || os.IsPathSeparator(path[1])) { + usr, err := user.Current() + if err != nil { + return "", errors.Wrap(err, "Failed to get the home directory of the user") + } + path = filepath.Join(usr.HomeDir, path[1:]) + } + + var err error + path, err = filepath.Abs(path) + if err != nil { + return "", errors.Wrap(err, "Failed to generate absolute path") + } + return path, nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/histogram.go b/vendor/github.com/dgraph-io/ristretto/z/histogram.go new file mode 100644 index 0000000000..4eb0c4f6c9 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/histogram.go @@ -0,0 +1,205 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "fmt" + "math" + "strings" + + "github.com/dustin/go-humanize" +) + +// Creates bounds for an histogram. The bounds are powers of two of the form +// [2^min_exponent, ..., 2^max_exponent]. +func HistogramBounds(minExponent, maxExponent uint32) []float64 { + var bounds []float64 + for i := minExponent; i <= maxExponent; i++ { + bounds = append(bounds, float64(int(1)< 4) + bounds := make([]float64, num) + bounds[0] = 1 + bounds[1] = 2 + for i := 2; i < num; i++ { + bounds[i] = bounds[i-1] + bounds[i-2] + } + return bounds +} + +// HistogramData stores the information needed to represent the sizes of the keys and values +// as a histogram. +type HistogramData struct { + Bounds []float64 + Count int64 + CountPerBucket []int64 + Min int64 + Max int64 + Sum int64 +} + +// NewHistogramData returns a new instance of HistogramData with properly initialized fields. +func NewHistogramData(bounds []float64) *HistogramData { + return &HistogramData{ + Bounds: bounds, + CountPerBucket: make([]int64, len(bounds)+1), + Max: 0, + Min: math.MaxInt64, + } +} + +func (histogram *HistogramData) Copy() *HistogramData { + if histogram == nil { + return nil + } + return &HistogramData{ + Bounds: append([]float64{}, histogram.Bounds...), + CountPerBucket: append([]int64{}, histogram.CountPerBucket...), + Count: histogram.Count, + Min: histogram.Min, + Max: histogram.Max, + Sum: histogram.Sum, + } +} + +// Update changes the Min and Max fields if value is less than or greater than the current values. +func (histogram *HistogramData) Update(value int64) { + if histogram == nil { + return + } + if value > histogram.Max { + histogram.Max = value + } + if value < histogram.Min { + histogram.Min = value + } + + histogram.Sum += value + histogram.Count++ + + for index := 0; index <= len(histogram.Bounds); index++ { + // Allocate value in the last buckets if we reached the end of the Bounds array. + if index == len(histogram.Bounds) { + histogram.CountPerBucket[index]++ + break + } + + if value < int64(histogram.Bounds[index]) { + histogram.CountPerBucket[index]++ + break + } + } +} + +// Mean returns the mean value for the histogram. +func (histogram *HistogramData) Mean() float64 { + if histogram.Count == 0 { + return 0 + } + return float64(histogram.Sum) / float64(histogram.Count) +} + +// String converts the histogram data into human-readable string. +func (histogram *HistogramData) String() string { + if histogram == nil { + return "" + } + var b strings.Builder + + b.WriteString("\n -- Histogram: \n") + b.WriteString(fmt.Sprintf("Min value: %d \n", histogram.Min)) + b.WriteString(fmt.Sprintf("Max value: %d \n", histogram.Max)) + b.WriteString(fmt.Sprintf("Count: %d \n", histogram.Count)) + b.WriteString(fmt.Sprintf("50p: %.2f \n", histogram.Percentile(0.5))) + b.WriteString(fmt.Sprintf("75p: %.2f \n", histogram.Percentile(0.75))) + b.WriteString(fmt.Sprintf("90p: %.2f \n", histogram.Percentile(0.90))) + + numBounds := len(histogram.Bounds) + var cum float64 + for index, count := range histogram.CountPerBucket { + if count == 0 { + continue + } + + // The last bucket represents the bucket that contains the range from + // the last bound up to infinity so it's processed differently than the + // other buckets. + if index == len(histogram.CountPerBucket)-1 { + lowerBound := uint64(histogram.Bounds[numBounds-1]) + page := float64(count*100) / float64(histogram.Count) + cum += page + b.WriteString(fmt.Sprintf("[%s, %s) %d %.2f%% %.2f%%\n", + humanize.IBytes(lowerBound), "infinity", count, page, cum)) + continue + } + + upperBound := uint64(histogram.Bounds[index]) + lowerBound := uint64(0) + if index > 0 { + lowerBound = uint64(histogram.Bounds[index-1]) + } + + page := float64(count*100) / float64(histogram.Count) + cum += page + b.WriteString(fmt.Sprintf("[%d, %d) %d %.2f%% %.2f%%\n", + lowerBound, upperBound, count, page, cum)) + } + b.WriteString(" --\n") + return b.String() +} + +// Percentile returns the percentile value for the histogram. +// value of p should be between [0.0-1.0] +func (histogram *HistogramData) Percentile(p float64) float64 { + if histogram == nil { + return 0 + } + + if histogram.Count == 0 { + // if no data return the minimum range + return histogram.Bounds[0] + } + pval := int64(float64(histogram.Count) * p) + for i, v := range histogram.CountPerBucket { + pval = pval - v + if pval <= 0 { + if i == len(histogram.Bounds) { + break + } + return histogram.Bounds[i] + } + } + // default return should be the max range + return histogram.Bounds[len(histogram.Bounds)-1] +} + +// Clear reset the histogram. Helpful in situations where we need to reset the metrics +func (histogram *HistogramData) Clear() { + if histogram == nil { + return + } + + histogram.Count = 0 + histogram.CountPerBucket = make([]int64, len(histogram.Bounds)+1) + histogram.Sum = 0 + histogram.Max = 0 + histogram.Min = math.MaxInt64 +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap.go b/vendor/github.com/dgraph-io/ristretto/z/mmap.go new file mode 100644 index 0000000000..9b02510003 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap.go @@ -0,0 +1,44 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "os" +) + +// Mmap uses the mmap system call to memory-map a file. If writable is true, +// memory protection of the pages is set so that they may be written to as well. +func Mmap(fd *os.File, writable bool, size int64) ([]byte, error) { + return mmap(fd, writable, size) +} + +// Munmap unmaps a previously mapped slice. +func Munmap(b []byte) error { + return munmap(b) +} + +// Madvise uses the madvise system call to give advise about the use of memory +// when using a slice that is memory-mapped to a file. Set the readahead flag to +// false if page references are expected in random order. +func Madvise(b []byte, readahead bool) error { + return madvise(b, readahead) +} + +// Msync would call sync on the mmapped data. +func Msync(b []byte) error { + return msync(b) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap_darwin.go b/vendor/github.com/dgraph-io/ristretto/z/mmap_darwin.go new file mode 100644 index 0000000000..4d6d74f193 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap_darwin.go @@ -0,0 +1,59 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "os" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +// Mmap uses the mmap system call to memory-map a file. If writable is true, +// memory protection of the pages is set so that they may be written to as well. +func mmap(fd *os.File, writable bool, size int64) ([]byte, error) { + mtype := unix.PROT_READ + if writable { + mtype |= unix.PROT_WRITE + } + return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED) +} + +// Munmap unmaps a previously mapped slice. +func munmap(b []byte) error { + return unix.Munmap(b) +} + +// This is required because the unix package does not support the madvise system call on OS X. +func madvise(b []byte, readahead bool) error { + advice := unix.MADV_NORMAL + if !readahead { + advice = unix.MADV_RANDOM + } + + _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), + uintptr(len(b)), uintptr(advice)) + if e1 != 0 { + return e1 + } + return nil +} + +func msync(b []byte) error { + return unix.Msync(b, unix.MS_SYNC) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap_linux.go b/vendor/github.com/dgraph-io/ristretto/z/mmap_linux.go new file mode 100644 index 0000000000..331330cff9 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap_linux.go @@ -0,0 +1,71 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +// mmap uses the mmap system call to memory-map a file. If writable is true, +// memory protection of the pages is set so that they may be written to as well. +func mmap(fd *os.File, writable bool, size int64) ([]byte, error) { + mtype := unix.PROT_READ + if writable { + mtype |= unix.PROT_WRITE + } + return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED) +} + +// munmap unmaps a previously mapped slice. +// +// unix.Munmap maintains an internal list of mmapped addresses, and only calls munmap +// if the address is present in that list. If we use mremap, this list is not updated. +// To bypass this, we call munmap ourselves. +func munmap(data []byte) error { + if len(data) == 0 || len(data) != cap(data) { + return unix.EINVAL + } + _, _, errno := unix.Syscall( + unix.SYS_MUNMAP, + uintptr(unsafe.Pointer(&data[0])), + uintptr(len(data)), + 0, + ) + if errno != 0 { + return errno + } + return nil +} + +// madvise uses the madvise system call to give advise about the use of memory +// when using a slice that is memory-mapped to a file. Set the readahead flag to +// false if page references are expected in random order. +func madvise(b []byte, readahead bool) error { + flags := unix.MADV_NORMAL + if !readahead { + flags = unix.MADV_RANDOM + } + return unix.Madvise(b, flags) +} + +// msync writes any modified data to persistent storage. +func msync(b []byte) error { + return unix.Msync(b, unix.MS_SYNC) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap_plan9.go b/vendor/github.com/dgraph-io/ristretto/z/mmap_plan9.go new file mode 100644 index 0000000000..f30729654f --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap_plan9.go @@ -0,0 +1,44 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "os" + "syscall" +) + +// Mmap uses the mmap system call to memory-map a file. If writable is true, +// memory protection of the pages is set so that they may be written to as well. +func mmap(fd *os.File, writable bool, size int64) ([]byte, error) { + return nil, syscall.EPLAN9 +} + +// Munmap unmaps a previously mapped slice. +func munmap(b []byte) error { + return syscall.EPLAN9 +} + +// Madvise uses the madvise system call to give advise about the use of memory +// when using a slice that is memory-mapped to a file. Set the readahead flag to +// false if page references are expected in random order. +func madvise(b []byte, readahead bool) error { + return syscall.EPLAN9 +} + +func msync(b []byte) error { + return syscall.EPLAN9 +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap_unix.go b/vendor/github.com/dgraph-io/ristretto/z/mmap_unix.go new file mode 100644 index 0000000000..e8b2699cf9 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap_unix.go @@ -0,0 +1,55 @@ +// +build !windows,!darwin,!plan9,!linux + +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// Mmap uses the mmap system call to memory-map a file. If writable is true, +// memory protection of the pages is set so that they may be written to as well. +func mmap(fd *os.File, writable bool, size int64) ([]byte, error) { + mtype := unix.PROT_READ + if writable { + mtype |= unix.PROT_WRITE + } + return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED) +} + +// Munmap unmaps a previously mapped slice. +func munmap(b []byte) error { + return unix.Munmap(b) +} + +// Madvise uses the madvise system call to give advise about the use of memory +// when using a slice that is memory-mapped to a file. Set the readahead flag to +// false if page references are expected in random order. +func madvise(b []byte, readahead bool) error { + flags := unix.MADV_NORMAL + if !readahead { + flags = unix.MADV_RANDOM + } + return unix.Madvise(b, flags) +} + +func msync(b []byte) error { + return unix.Msync(b, unix.MS_SYNC) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mmap_windows.go b/vendor/github.com/dgraph-io/ristretto/z/mmap_windows.go new file mode 100644 index 0000000000..171176e9fe --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mmap_windows.go @@ -0,0 +1,96 @@ +// +build windows + +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "fmt" + "os" + "syscall" + "unsafe" +) + +func mmap(fd *os.File, write bool, size int64) ([]byte, error) { + protect := syscall.PAGE_READONLY + access := syscall.FILE_MAP_READ + + if write { + protect = syscall.PAGE_READWRITE + access = syscall.FILE_MAP_WRITE + } + fi, err := fd.Stat() + if err != nil { + return nil, err + } + + // In windows, we cannot mmap a file more than it's actual size. + // So truncate the file to the size of the mmap. + if fi.Size() < size { + if err := fd.Truncate(size); err != nil { + return nil, fmt.Errorf("truncate: %s", err) + } + } + + // Open a file mapping handle. + sizelo := uint32(size >> 32) + sizehi := uint32(size) & 0xffffffff + + handler, err := syscall.CreateFileMapping(syscall.Handle(fd.Fd()), nil, + uint32(protect), sizelo, sizehi, nil) + if err != nil { + return nil, os.NewSyscallError("CreateFileMapping", err) + } + + // Create the memory map. + addr, err := syscall.MapViewOfFile(handler, uint32(access), 0, 0, uintptr(size)) + if addr == 0 { + return nil, os.NewSyscallError("MapViewOfFile", err) + } + + // Close mapping handle. + if err := syscall.CloseHandle(syscall.Handle(handler)); err != nil { + return nil, os.NewSyscallError("CloseHandle", err) + } + + // Slice memory layout + // Copied this snippet from golang/sys package + var sl = struct { + addr uintptr + len int + cap int + }{addr, int(size), int(size)} + + // Use unsafe to turn sl into a []byte. + data := *(*[]byte)(unsafe.Pointer(&sl)) + + return data, nil +} + +func munmap(b []byte) error { + return syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&b[0]))) +} + +func madvise(b []byte, readahead bool) error { + // Do Nothing. We don’t care about this setting on Windows + return nil +} + +func msync(b []byte) error { + // TODO: Figure out how to do msync on Windows. + return nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mremap_linux.go b/vendor/github.com/dgraph-io/ristretto/z/mremap_linux.go new file mode 100644 index 0000000000..225678658d --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mremap_linux.go @@ -0,0 +1,56 @@ +// +build !arm64 + +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "fmt" + "reflect" + "unsafe" + + "golang.org/x/sys/unix" +) + +// mremap is a Linux-specific system call to remap pages in memory. This can be used in place of munmap + mmap. +func mremap(data []byte, size int) ([]byte, error) { + //nolint:lll + // taken from + const MREMAP_MAYMOVE = 0x1 + + header := (*reflect.SliceHeader)(unsafe.Pointer(&data)) + mmapAddr, mmapSize, errno := unix.Syscall6( + unix.SYS_MREMAP, + header.Data, + uintptr(header.Len), + uintptr(size), + uintptr(MREMAP_MAYMOVE), + 0, + 0, + ) + if errno != 0 { + return nil, errno + } + if mmapSize != uintptr(size) { + return nil, fmt.Errorf("mremap size mismatch: requested: %d got: %d", size, mmapSize) + } + + header.Data = mmapAddr + header.Cap = size + header.Len = size + return data, nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/mremap_linux_arm64.go b/vendor/github.com/dgraph-io/ristretto/z/mremap_linux_arm64.go new file mode 100644 index 0000000000..09683cdfeb --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/mremap_linux_arm64.go @@ -0,0 +1,52 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "reflect" + "unsafe" + + "golang.org/x/sys/unix" +) + +// mremap is a Linux-specific system call to remap pages in memory. This can be used in place of munmap + mmap. +func mremap(data []byte, size int) ([]byte, error) { + //nolint:lll + // taken from + const MREMAP_MAYMOVE = 0x1 + + header := (*reflect.SliceHeader)(unsafe.Pointer(&data)) + // For ARM64, the second return argument for SYS_MREMAP is inconsistent (prior allocated size) with + // other architectures, which return the size allocated + mmapAddr, _, errno := unix.Syscall6( + unix.SYS_MREMAP, + header.Data, + uintptr(header.Len), + uintptr(size), + uintptr(MREMAP_MAYMOVE), + 0, + 0, + ) + if errno != 0 { + return nil, errno + } + + header.Data = mmapAddr + header.Cap = size + header.Len = size + return data, nil +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/rtutil.go b/vendor/github.com/dgraph-io/ristretto/z/rtutil.go new file mode 100644 index 0000000000..8f317c80d3 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/rtutil.go @@ -0,0 +1,75 @@ +// MIT License + +// Copyright (c) 2019 Ewan Chou + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package z + +import ( + "unsafe" +) + +// NanoTime returns the current time in nanoseconds from a monotonic clock. +//go:linkname NanoTime runtime.nanotime +func NanoTime() int64 + +// CPUTicks is a faster alternative to NanoTime to measure time duration. +//go:linkname CPUTicks runtime.cputicks +func CPUTicks() int64 + +type stringStruct struct { + str unsafe.Pointer + len int +} + +//go:noescape +//go:linkname memhash runtime.memhash +func memhash(p unsafe.Pointer, h, s uintptr) uintptr + +// MemHash is the hash function used by go map, it utilizes available hardware instructions(behaves +// as aeshash if aes instruction is available). +// NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash. +func MemHash(data []byte) uint64 { + ss := (*stringStruct)(unsafe.Pointer(&data)) + return uint64(memhash(ss.str, 0, uintptr(ss.len))) +} + +// MemHashString is the hash function used by go map, it utilizes available hardware instructions +// (behaves as aeshash if aes instruction is available). +// NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash. +func MemHashString(str string) uint64 { + ss := (*stringStruct)(unsafe.Pointer(&str)) + return uint64(memhash(ss.str, 0, uintptr(ss.len))) +} + +// FastRand is a fast thread local random function. +//go:linkname FastRand runtime.fastrand +func FastRand() uint32 + +//go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers +func memclrNoHeapPointers(p unsafe.Pointer, n uintptr) + +func Memclr(b []byte) { + if len(b) == 0 { + return + } + p := unsafe.Pointer(&b[0]) + memclrNoHeapPointers(p, uintptr(len(b))) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/rtutil.s b/vendor/github.com/dgraph-io/ristretto/z/rtutil.s new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vendor/github.com/dgraph-io/ristretto/z/simd/baseline.go b/vendor/github.com/dgraph-io/ristretto/z/simd/baseline.go new file mode 100644 index 0000000000..967e3a307e --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/simd/baseline.go @@ -0,0 +1,127 @@ +package simd + +import ( + "fmt" + "runtime" + "sort" + "sync" +) + +// Search finds the key using the naive way +func Naive(xs []uint64, k uint64) int16 { + var i int + for i = 0; i < len(xs); i += 2 { + x := xs[i] + if x >= k { + return int16(i / 2) + } + } + return int16(i / 2) +} + +func Clever(xs []uint64, k uint64) int16 { + if len(xs) < 8 { + return Naive(xs, k) + } + var twos, pk [4]uint64 + pk[0] = k + pk[1] = k + pk[2] = k + pk[3] = k + for i := 0; i < len(xs); i += 8 { + twos[0] = xs[i] + twos[1] = xs[i+2] + twos[2] = xs[i+4] + twos[3] = xs[i+6] + if twos[0] >= pk[0] { + return int16(i / 2) + } + if twos[1] >= pk[1] { + return int16((i + 2) / 2) + } + if twos[2] >= pk[2] { + return int16((i + 4) / 2) + } + if twos[3] >= pk[3] { + return int16((i + 6) / 2) + } + + } + return int16(len(xs) / 2) +} + +func Parallel(xs []uint64, k uint64) int16 { + cpus := runtime.NumCPU() + if cpus%2 != 0 { + panic(fmt.Sprintf("odd number of CPUs %v", cpus)) + } + sz := len(xs)/cpus + 1 + var wg sync.WaitGroup + retChan := make(chan int16, cpus) + for i := 0; i < len(xs); i += sz { + end := i + sz + if end >= len(xs) { + end = len(xs) + } + chunk := xs[i:end] + wg.Add(1) + go func(hd int16, xs []uint64, k uint64, wg *sync.WaitGroup, ch chan int16) { + for i := 0; i < len(xs); i += 2 { + if xs[i] >= k { + ch <- (int16(i) + hd) / 2 + break + } + } + wg.Done() + }(int16(i), chunk, k, &wg, retChan) + } + wg.Wait() + close(retChan) + var min int16 = (1 << 15) - 1 + for i := range retChan { + if i < min { + min = i + } + } + if min == (1<<15)-1 { + return int16(len(xs) / 2) + } + return min +} + +func Binary(keys []uint64, key uint64) int16 { + return int16(sort.Search(len(keys), func(i int) bool { + if i*2 >= len(keys) { + return true + } + return keys[i*2] >= key + })) +} + +func cmp2_native(twos, pk [2]uint64) int16 { + if twos[0] == pk[0] { + return 0 + } + if twos[1] == pk[1] { + return 1 + } + return 2 +} + +func cmp4_native(fours, pk [4]uint64) int16 { + for i := range fours { + if fours[i] >= pk[i] { + return int16(i) + } + } + return 4 +} + +func cmp8_native(a [8]uint64, pk [4]uint64) int16 { + for i := range a { + if a[i] >= pk[0] { + return int16(i) + } + } + return 8 +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/simd/search.go b/vendor/github.com/dgraph-io/ristretto/z/simd/search.go new file mode 100644 index 0000000000..b1e639225a --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/simd/search.go @@ -0,0 +1,51 @@ +// +build !amd64 + +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package simd + +// Search uses the Clever search to find the correct key. +func Search(xs []uint64, k uint64) int16 { + if len(xs) < 8 || (len(xs) % 8 != 0) { + return Naive(xs, k) + } + var twos, pk [4]uint64 + pk[0] = k + pk[1] = k + pk[2] = k + pk[3] = k + for i := 0; i < len(xs); i += 8 { + twos[0] = xs[i] + twos[1] = xs[i+2] + twos[2] = xs[i+4] + twos[3] = xs[i+6] + if twos[0] >= pk[0] { + return int16(i / 2) + } + if twos[1] >= pk[1] { + return int16((i + 2) / 2) + } + if twos[2] >= pk[2] { + return int16((i + 4) / 2) + } + if twos[3] >= pk[3] { + return int16((i + 6) / 2) + } + + } + return int16(len(xs) / 2) +} diff --git a/vendor/github.com/dgraph-io/ristretto/z/simd/search_amd64.s b/vendor/github.com/dgraph-io/ristretto/z/simd/search_amd64.s new file mode 100644 index 0000000000..150c846647 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/simd/search_amd64.s @@ -0,0 +1,60 @@ +// Code generated by command: go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go. DO NOT EDIT. + +#include "textflag.h" + +// func Search(xs []uint64, k uint64) int16 +TEXT ·Search(SB), NOSPLIT, $0-34 + MOVQ xs_base+0(FP), AX + MOVQ xs_len+8(FP), CX + MOVQ k+24(FP), DX + + // Save n + MOVQ CX, BX + + // Initialize idx register to zero. + XORL BP, BP + +loop: + // Unroll1 + CMPQ (AX)(BP*8), DX + JAE Found + + // Unroll2 + CMPQ 16(AX)(BP*8), DX + JAE Found2 + + // Unroll3 + CMPQ 32(AX)(BP*8), DX + JAE Found3 + + // Unroll4 + CMPQ 48(AX)(BP*8), DX + JAE Found4 + + // plus8 + ADDQ $0x08, BP + CMPQ BP, CX + JB loop + JMP NotFound + +Found2: + ADDL $0x02, BP + JMP Found + +Found3: + ADDL $0x04, BP + JMP Found + +Found4: + ADDL $0x06, BP + +Found: + MOVL BP, BX + +NotFound: + MOVL BX, BP + SHRL $0x1f, BP + ADDL BX, BP + SHRL $0x01, BP + MOVL BP, ret+32(FP) + RET diff --git a/vendor/github.com/dgraph-io/ristretto/z/simd/stub_search_amd64.go b/vendor/github.com/dgraph-io/ristretto/z/simd/stub_search_amd64.go new file mode 100644 index 0000000000..0821d38a77 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/simd/stub_search_amd64.go @@ -0,0 +1,6 @@ +// Code generated by command: go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go. DO NOT EDIT. + +package simd + +// Search finds the first idx for which xs[idx] >= k in xs. +func Search(xs []uint64, k uint64) int16 diff --git a/vendor/github.com/dgraph-io/ristretto/z/z.go b/vendor/github.com/dgraph-io/ristretto/z/z.go new file mode 100644 index 0000000000..97455586a1 --- /dev/null +++ b/vendor/github.com/dgraph-io/ristretto/z/z.go @@ -0,0 +1,151 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package z + +import ( + "context" + "sync" + + "github.com/cespare/xxhash/v2" +) + +// TODO: Figure out a way to re-use memhash for the second uint64 hash, we +// already know that appending bytes isn't reliable for generating a +// second hash (see Ristretto PR #88). +// +// We also know that while the Go runtime has a runtime memhash128 +// function, it's not possible to use it to generate [2]uint64 or +// anything resembling a 128bit hash, even though that's exactly what +// we need in this situation. +func KeyToHash(key interface{}) (uint64, uint64) { + if key == nil { + return 0, 0 + } + switch k := key.(type) { + case uint64: + return k, 0 + case string: + return MemHashString(k), xxhash.Sum64String(k) + case []byte: + return MemHash(k), xxhash.Sum64(k) + case byte: + return uint64(k), 0 + case int: + return uint64(k), 0 + case int32: + return uint64(k), 0 + case uint32: + return uint64(k), 0 + case int64: + return uint64(k), 0 + default: + panic("Key type not supported") + } +} + +var ( + dummyCloserChan <-chan struct{} + tmpDir string +) + +// Closer holds the two things we need to close a goroutine and wait for it to +// finish: a chan to tell the goroutine to shut down, and a WaitGroup with +// which to wait for it to finish shutting down. +type Closer struct { + waiting sync.WaitGroup + + ctx context.Context + cancel context.CancelFunc +} + +// SetTmpDir sets the temporary directory for the temporary buffers. +func SetTmpDir(dir string) { + tmpDir = dir +} + +// NewCloser constructs a new Closer, with an initial count on the WaitGroup. +func NewCloser(initial int) *Closer { + ret := &Closer{} + ret.ctx, ret.cancel = context.WithCancel(context.Background()) + ret.waiting.Add(initial) + return ret +} + +// AddRunning Add()'s delta to the WaitGroup. +func (lc *Closer) AddRunning(delta int) { + lc.waiting.Add(delta) +} + +// Ctx can be used to get a context, which would automatically get cancelled when Signal is called. +func (lc *Closer) Ctx() context.Context { + if lc == nil { + return context.Background() + } + return lc.ctx +} + +// Signal signals the HasBeenClosed signal. +func (lc *Closer) Signal() { + // Todo(ibrahim): Change Signal to return error on next badger breaking change. + lc.cancel() +} + +// HasBeenClosed gets signaled when Signal() is called. +func (lc *Closer) HasBeenClosed() <-chan struct{} { + if lc == nil { + return dummyCloserChan + } + return lc.ctx.Done() +} + +// Done calls Done() on the WaitGroup. +func (lc *Closer) Done() { + if lc == nil { + return + } + lc.waiting.Done() +} + +// Wait waits on the WaitGroup. (It waits for NewCloser's initial value, AddRunning, and Done +// calls to balance out.) +func (lc *Closer) Wait() { + lc.waiting.Wait() +} + +// SignalAndWait calls Signal(), then Wait(). +func (lc *Closer) SignalAndWait() { + lc.Signal() + lc.Wait() +} + +// ZeroOut zeroes out all the bytes in the range [start, end). +func ZeroOut(dst []byte, start, end int) { + if start < 0 || start >= len(dst) { + return // BAD + } + if end >= len(dst) { + end = len(dst) + } + if end-start <= 0 { + return + } + Memclr(dst[start:end]) + // b := dst[start:end] + // for i := range b { + // b[i] = 0x0 + // } +} diff --git a/vendor/github.com/golang/glog/LICENSE b/vendor/github.com/golang/glog/LICENSE new file mode 100644 index 0000000000..37ec93a14f --- /dev/null +++ b/vendor/github.com/golang/glog/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/golang/glog/README.md b/vendor/github.com/golang/glog/README.md new file mode 100644 index 0000000000..a4f73883b2 --- /dev/null +++ b/vendor/github.com/golang/glog/README.md @@ -0,0 +1,36 @@ +# glog + +[![PkgGoDev](https://pkg.go.dev/badge/github.com/golang/glog)](https://pkg.go.dev/github.com/golang/glog) + +Leveled execution logs for Go. + +This is an efficient pure Go implementation of leveled logs in the +manner of the open source C++ package [_glog_](https://github.com/google/glog). + +By binding methods to booleans it is possible to use the log package without paying the expense of evaluating the arguments to the log. Through the `-vmodule` flag, the package also provides fine-grained +control over logging at the file level. + +The comment from `glog.go` introduces the ideas: + +Package _glog_ implements logging analogous to the Google-internal C++ INFO/ERROR/V setup. It provides the functions Info, Warning, Error, Fatal, plus formatting variants such as Infof. It also provides V-style loggingcontrolled by the `-v` and `-vmodule=file=2` flags. + +Basic examples: + +```go +glog.Info("Prepare to repel boarders") + +glog.Fatalf("Initialization failed: %s", err) +``` + +See the documentation for the V function for an explanation of these examples: + +```go +if glog.V(2) { + glog.Info("Starting transaction...") +} +glog.V(2).Infoln("Processed", nItems, "elements") +``` + +The repository contains an open source version of the log package used inside Google. The master copy of the source lives inside Google, not here. The code in this repo is for export only and is not itself under development. Feature requests will be ignored. + +Send bug reports to golang-nuts@googlegroups.com. diff --git a/vendor/github.com/golang/glog/glog.go b/vendor/github.com/golang/glog/glog.go new file mode 100644 index 0000000000..718c34f886 --- /dev/null +++ b/vendor/github.com/golang/glog/glog.go @@ -0,0 +1,1180 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package glog implements logging analogous to the Google-internal C++ INFO/ERROR/V setup. +// It provides functions Info, Warning, Error, Fatal, plus formatting variants such as +// Infof. It also provides V-style logging controlled by the -v and -vmodule=file=2 flags. +// +// Basic examples: +// +// glog.Info("Prepare to repel boarders") +// +// glog.Fatalf("Initialization failed: %s", err) +// +// See the documentation for the V function for an explanation of these examples: +// +// if glog.V(2) { +// glog.Info("Starting transaction...") +// } +// +// glog.V(2).Infoln("Processed", nItems, "elements") +// +// Log output is buffered and written periodically using Flush. Programs +// should call Flush before exiting to guarantee all log output is written. +// +// By default, all log statements write to files in a temporary directory. +// This package provides several flags that modify this behavior. +// As a result, flag.Parse must be called before any logging is done. +// +// -logtostderr=false +// Logs are written to standard error instead of to files. +// -alsologtostderr=false +// Logs are written to standard error as well as to files. +// -stderrthreshold=ERROR +// Log events at or above this severity are logged to standard +// error as well as to files. +// -log_dir="" +// Log files will be written to this directory instead of the +// default temporary directory. +// +// Other flags provide aids to debugging. +// +// -log_backtrace_at="" +// When set to a file and line number holding a logging statement, +// such as +// -log_backtrace_at=gopherflakes.go:234 +// a stack trace will be written to the Info log whenever execution +// hits that statement. (Unlike with -vmodule, the ".go" must be +// present.) +// -v=0 +// Enable V-leveled logging at the specified level. +// -vmodule="" +// The syntax of the argument is a comma-separated list of pattern=N, +// where pattern is a literal file name (minus the ".go" suffix) or +// "glob" pattern and N is a V level. For instance, +// -vmodule=gopher*=3 +// sets the V level to 3 in all Go files whose names begin "gopher". +// +package glog + +import ( + "bufio" + "bytes" + "errors" + "flag" + "fmt" + "io" + stdLog "log" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" +) + +// severity identifies the sort of log: info, warning etc. It also implements +// the flag.Value interface. The -stderrthreshold flag is of type severity and +// should be modified only through the flag.Value interface. The values match +// the corresponding constants in C++. +type severity int32 // sync/atomic int32 + +// These constants identify the log levels in order of increasing severity. +// A message written to a high-severity log file is also written to each +// lower-severity log file. +const ( + infoLog severity = iota + warningLog + errorLog + fatalLog + numSeverity = 4 +) + +const severityChar = "IWEF" + +var severityName = []string{ + infoLog: "INFO", + warningLog: "WARNING", + errorLog: "ERROR", + fatalLog: "FATAL", +} + +// get returns the value of the severity. +func (s *severity) get() severity { + return severity(atomic.LoadInt32((*int32)(s))) +} + +// set sets the value of the severity. +func (s *severity) set(val severity) { + atomic.StoreInt32((*int32)(s), int32(val)) +} + +// String is part of the flag.Value interface. +func (s *severity) String() string { + return strconv.FormatInt(int64(*s), 10) +} + +// Get is part of the flag.Value interface. +func (s *severity) Get() interface{} { + return *s +} + +// Set is part of the flag.Value interface. +func (s *severity) Set(value string) error { + var threshold severity + // Is it a known name? + if v, ok := severityByName(value); ok { + threshold = v + } else { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + threshold = severity(v) + } + logging.stderrThreshold.set(threshold) + return nil +} + +func severityByName(s string) (severity, bool) { + s = strings.ToUpper(s) + for i, name := range severityName { + if name == s { + return severity(i), true + } + } + return 0, false +} + +// OutputStats tracks the number of output lines and bytes written. +type OutputStats struct { + lines int64 + bytes int64 +} + +// Lines returns the number of lines written. +func (s *OutputStats) Lines() int64 { + return atomic.LoadInt64(&s.lines) +} + +// Bytes returns the number of bytes written. +func (s *OutputStats) Bytes() int64 { + return atomic.LoadInt64(&s.bytes) +} + +// Stats tracks the number of lines of output and number of bytes +// per severity level. Values must be read with atomic.LoadInt64. +var Stats struct { + Info, Warning, Error OutputStats +} + +var severityStats = [numSeverity]*OutputStats{ + infoLog: &Stats.Info, + warningLog: &Stats.Warning, + errorLog: &Stats.Error, +} + +// Level is exported because it appears in the arguments to V and is +// the type of the v flag, which can be set programmatically. +// It's a distinct type because we want to discriminate it from logType. +// Variables of type level are only changed under logging.mu. +// The -v flag is read only with atomic ops, so the state of the logging +// module is consistent. + +// Level is treated as a sync/atomic int32. + +// Level specifies a level of verbosity for V logs. *Level implements +// flag.Value; the -v flag is of type Level and should be modified +// only through the flag.Value interface. +type Level int32 + +// get returns the value of the Level. +func (l *Level) get() Level { + return Level(atomic.LoadInt32((*int32)(l))) +} + +// set sets the value of the Level. +func (l *Level) set(val Level) { + atomic.StoreInt32((*int32)(l), int32(val)) +} + +// String is part of the flag.Value interface. +func (l *Level) String() string { + return strconv.FormatInt(int64(*l), 10) +} + +// Get is part of the flag.Value interface. +func (l *Level) Get() interface{} { + return *l +} + +// Set is part of the flag.Value interface. +func (l *Level) Set(value string) error { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(Level(v), logging.vmodule.filter, false) + return nil +} + +// moduleSpec represents the setting of the -vmodule flag. +type moduleSpec struct { + filter []modulePat +} + +// modulePat contains a filter for the -vmodule flag. +// It holds a verbosity level and a file pattern to match. +type modulePat struct { + pattern string + literal bool // The pattern is a literal string + level Level +} + +// match reports whether the file matches the pattern. It uses a string +// comparison if the pattern contains no metacharacters. +func (m *modulePat) match(file string) bool { + if m.literal { + return file == m.pattern + } + match, _ := filepath.Match(m.pattern, file) + return match +} + +func (m *moduleSpec) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + var b bytes.Buffer + for i, f := range m.filter { + if i > 0 { + b.WriteRune(',') + } + fmt.Fprintf(&b, "%s=%d", f.pattern, f.level) + } + return b.String() +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported. +func (m *moduleSpec) Get() interface{} { + return nil +} + +var errVmoduleSyntax = errors.New("syntax error: expect comma-separated list of filename=N") + +// Syntax: -vmodule=recordio=2,file=1,gfs*=3 +func (m *moduleSpec) Set(value string) error { + var filter []modulePat + for _, pat := range strings.Split(value, ",") { + if len(pat) == 0 { + // Empty strings such as from a trailing comma can be ignored. + continue + } + patLev := strings.Split(pat, "=") + if len(patLev) != 2 || len(patLev[0]) == 0 || len(patLev[1]) == 0 { + return errVmoduleSyntax + } + pattern := patLev[0] + v, err := strconv.Atoi(patLev[1]) + if err != nil { + return errors.New("syntax error: expect comma-separated list of filename=N") + } + if v < 0 { + return errors.New("negative value for vmodule level") + } + if v == 0 { + continue // Ignore. It's harmless but no point in paying the overhead. + } + // TODO: check syntax of filter? + filter = append(filter, modulePat{pattern, isLiteral(pattern), Level(v)}) + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(logging.verbosity, filter, true) + return nil +} + +// isLiteral reports whether the pattern is a literal string, that is, has no metacharacters +// that require filepath.Match to be called to match the pattern. +func isLiteral(pattern string) bool { + return !strings.ContainsAny(pattern, `\*?[]`) +} + +// traceLocation represents the setting of the -log_backtrace_at flag. +type traceLocation struct { + file string + line int +} + +// isSet reports whether the trace location has been specified. +// logging.mu is held. +func (t *traceLocation) isSet() bool { + return t.line > 0 +} + +// match reports whether the specified file and line matches the trace location. +// The argument file name is the full path, not the basename specified in the flag. +// logging.mu is held. +func (t *traceLocation) match(file string, line int) bool { + if t.line != line { + return false + } + if i := strings.LastIndex(file, "/"); i >= 0 { + file = file[i+1:] + } + return t.file == file +} + +func (t *traceLocation) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + return fmt.Sprintf("%s:%d", t.file, t.line) +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported +func (t *traceLocation) Get() interface{} { + return nil +} + +var errTraceSyntax = errors.New("syntax error: expect file.go:234") + +// Syntax: -log_backtrace_at=gopherflakes.go:234 +// Note that unlike vmodule the file extension is included here. +func (t *traceLocation) Set(value string) error { + if value == "" { + // Unset. + t.line = 0 + t.file = "" + } + fields := strings.Split(value, ":") + if len(fields) != 2 { + return errTraceSyntax + } + file, line := fields[0], fields[1] + if !strings.Contains(file, ".") { + return errTraceSyntax + } + v, err := strconv.Atoi(line) + if err != nil { + return errTraceSyntax + } + if v <= 0 { + return errors.New("negative or zero value for level") + } + logging.mu.Lock() + defer logging.mu.Unlock() + t.line = v + t.file = file + return nil +} + +// flushSyncWriter is the interface satisfied by logging destinations. +type flushSyncWriter interface { + Flush() error + Sync() error + io.Writer +} + +func init() { + flag.BoolVar(&logging.toStderr, "logtostderr", false, "log to standard error instead of files") + flag.BoolVar(&logging.alsoToStderr, "alsologtostderr", false, "log to standard error as well as files") + flag.Var(&logging.verbosity, "v", "log level for V logs") + flag.Var(&logging.stderrThreshold, "stderrthreshold", "logs at or above this threshold go to stderr") + flag.Var(&logging.vmodule, "vmodule", "comma-separated list of pattern=N settings for file-filtered logging") + flag.Var(&logging.traceLocation, "log_backtrace_at", "when logging hits line file:N, emit a stack trace") + + // Default stderrThreshold is ERROR. + logging.stderrThreshold = errorLog + + logging.setVState(0, nil, false) + go logging.flushDaemon() +} + +// Flush flushes all pending log I/O. +func Flush() { + logging.lockAndFlushAll() +} + +// loggingT collects all the global state of the logging setup. +type loggingT struct { + // Boolean flags. Not handled atomically because the flag.Value interface + // does not let us avoid the =true, and that shorthand is necessary for + // compatibility. TODO: does this matter enough to fix? Seems unlikely. + toStderr bool // The -logtostderr flag. + alsoToStderr bool // The -alsologtostderr flag. + + // Level flag. Handled atomically. + stderrThreshold severity // The -stderrthreshold flag. + + // freeList is a list of byte buffers, maintained under freeListMu. + freeList *buffer + // freeListMu maintains the free list. It is separate from the main mutex + // so buffers can be grabbed and printed to without holding the main lock, + // for better parallelization. + freeListMu sync.Mutex + + // mu protects the remaining elements of this structure and is + // used to synchronize logging. + mu sync.Mutex + // file holds writer for each of the log types. + file [numSeverity]flushSyncWriter + // pcs is used in V to avoid an allocation when computing the caller's PC. + pcs [1]uintptr + // vmap is a cache of the V Level for each V() call site, identified by PC. + // It is wiped whenever the vmodule flag changes state. + vmap map[uintptr]Level + // filterLength stores the length of the vmodule filter chain. If greater + // than zero, it means vmodule is enabled. It may be read safely + // using sync.LoadInt32, but is only modified under mu. + filterLength int32 + // traceLocation is the state of the -log_backtrace_at flag. + traceLocation traceLocation + // These flags are modified only under lock, although verbosity may be fetched + // safely using atomic.LoadInt32. + vmodule moduleSpec // The state of the -vmodule flag. + verbosity Level // V logging level, the value of the -v flag/ +} + +// buffer holds a byte Buffer for reuse. The zero value is ready for use. +type buffer struct { + bytes.Buffer + tmp [64]byte // temporary byte array for creating headers. + next *buffer +} + +var logging loggingT + +// setVState sets a consistent state for V logging. +// l.mu is held. +func (l *loggingT) setVState(verbosity Level, filter []modulePat, setFilter bool) { + // Turn verbosity off so V will not fire while we are in transition. + logging.verbosity.set(0) + // Ditto for filter length. + atomic.StoreInt32(&logging.filterLength, 0) + + // Set the new filters and wipe the pc->Level map if the filter has changed. + if setFilter { + logging.vmodule.filter = filter + logging.vmap = make(map[uintptr]Level) + } + + // Things are consistent now, so enable filtering and verbosity. + // They are enabled in order opposite to that in V. + atomic.StoreInt32(&logging.filterLength, int32(len(filter))) + logging.verbosity.set(verbosity) +} + +// getBuffer returns a new, ready-to-use buffer. +func (l *loggingT) getBuffer() *buffer { + l.freeListMu.Lock() + b := l.freeList + if b != nil { + l.freeList = b.next + } + l.freeListMu.Unlock() + if b == nil { + b = new(buffer) + } else { + b.next = nil + b.Reset() + } + return b +} + +// putBuffer returns a buffer to the free list. +func (l *loggingT) putBuffer(b *buffer) { + if b.Len() >= 256 { + // Let big buffers die a natural death. + return + } + l.freeListMu.Lock() + b.next = l.freeList + l.freeList = b + l.freeListMu.Unlock() +} + +var timeNow = time.Now // Stubbed out for testing. + +/* +header formats a log header as defined by the C++ implementation. +It returns a buffer containing the formatted header and the user's file and line number. +The depth specifies how many stack frames above lives the source line to be identified in the log message. + +Log lines have this form: + Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg... +where the fields are defined as follows: + L A single character, representing the log level (eg 'I' for INFO) + mm The month (zero padded; ie May is '05') + dd The day (zero padded) + hh:mm:ss.uuuuuu Time in hours, minutes and fractional seconds + threadid The space-padded thread ID as returned by GetTID() + file The file name + line The line number + msg The user-supplied message +*/ +func (l *loggingT) header(s severity, depth int) (*buffer, string, int) { + _, file, line, ok := runtime.Caller(3 + depth) + if !ok { + file = "???" + line = 1 + } else { + slash := strings.LastIndex(file, "/") + if slash >= 0 { + file = file[slash+1:] + } + } + return l.formatHeader(s, file, line), file, line +} + +// formatHeader formats a log header using the provided file name and line number. +func (l *loggingT) formatHeader(s severity, file string, line int) *buffer { + now := timeNow() + if line < 0 { + line = 0 // not a real line number, but acceptable to someDigits + } + if s > fatalLog { + s = infoLog // for safety. + } + buf := l.getBuffer() + + // Avoid Fprintf, for speed. The format is so simple that we can do it quickly by hand. + // It's worth about 3X. Fprintf is hard. + _, month, day := now.Date() + hour, minute, second := now.Clock() + // Lmmdd hh:mm:ss.uuuuuu threadid file:line] + buf.tmp[0] = severityChar[s] + buf.twoDigits(1, int(month)) + buf.twoDigits(3, day) + buf.tmp[5] = ' ' + buf.twoDigits(6, hour) + buf.tmp[8] = ':' + buf.twoDigits(9, minute) + buf.tmp[11] = ':' + buf.twoDigits(12, second) + buf.tmp[14] = '.' + buf.nDigits(6, 15, now.Nanosecond()/1000, '0') + buf.tmp[21] = ' ' + buf.nDigits(7, 22, pid, ' ') // TODO: should be TID + buf.tmp[29] = ' ' + buf.Write(buf.tmp[:30]) + buf.WriteString(file) + buf.tmp[0] = ':' + n := buf.someDigits(1, line) + buf.tmp[n+1] = ']' + buf.tmp[n+2] = ' ' + buf.Write(buf.tmp[:n+3]) + return buf +} + +// Some custom tiny helper functions to print the log header efficiently. + +const digits = "0123456789" + +// twoDigits formats a zero-prefixed two-digit integer at buf.tmp[i]. +func (buf *buffer) twoDigits(i, d int) { + buf.tmp[i+1] = digits[d%10] + d /= 10 + buf.tmp[i] = digits[d%10] +} + +// nDigits formats an n-digit integer at buf.tmp[i], +// padding with pad on the left. +// It assumes d >= 0. +func (buf *buffer) nDigits(n, i, d int, pad byte) { + j := n - 1 + for ; j >= 0 && d > 0; j-- { + buf.tmp[i+j] = digits[d%10] + d /= 10 + } + for ; j >= 0; j-- { + buf.tmp[i+j] = pad + } +} + +// someDigits formats a zero-prefixed variable-width integer at buf.tmp[i]. +func (buf *buffer) someDigits(i, d int) int { + // Print into the top, then copy down. We know there's space for at least + // a 10-digit number. + j := len(buf.tmp) + for { + j-- + buf.tmp[j] = digits[d%10] + d /= 10 + if d == 0 { + break + } + } + return copy(buf.tmp[i:], buf.tmp[j:]) +} + +func (l *loggingT) println(s severity, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintln(buf, args...) + l.output(s, buf, file, line, false) +} + +func (l *loggingT) print(s severity, args ...interface{}) { + l.printDepth(s, 1, args...) +} + +func (l *loggingT) printDepth(s severity, depth int, args ...interface{}) { + buf, file, line := l.header(s, depth) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +func (l *loggingT) printf(s severity, format string, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintf(buf, format, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +// printWithFileLine behaves like print but uses the provided file and line number. If +// alsoLogToStderr is true, the log message always appears on standard error; it +// will also appear in the log file unless --logtostderr is set. +func (l *loggingT) printWithFileLine(s severity, file string, line int, alsoToStderr bool, args ...interface{}) { + buf := l.formatHeader(s, file, line) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, alsoToStderr) +} + +// output writes the data to the log files and releases the buffer. +func (l *loggingT) output(s severity, buf *buffer, file string, line int, alsoToStderr bool) { + l.mu.Lock() + if l.traceLocation.isSet() { + if l.traceLocation.match(file, line) { + buf.Write(stacks(false)) + } + } + data := buf.Bytes() + if !flag.Parsed() { + os.Stderr.Write([]byte("ERROR: logging before flag.Parse: ")) + os.Stderr.Write(data) + } else if l.toStderr { + os.Stderr.Write(data) + } else { + if alsoToStderr || l.alsoToStderr || s >= l.stderrThreshold.get() { + os.Stderr.Write(data) + } + if l.file[s] == nil { + if err := l.createFiles(s); err != nil { + os.Stderr.Write(data) // Make sure the message appears somewhere. + l.exit(err) + } + } + switch s { + case fatalLog: + l.file[fatalLog].Write(data) + fallthrough + case errorLog: + l.file[errorLog].Write(data) + fallthrough + case warningLog: + l.file[warningLog].Write(data) + fallthrough + case infoLog: + l.file[infoLog].Write(data) + } + } + if s == fatalLog { + // If we got here via Exit rather than Fatal, print no stacks. + if atomic.LoadUint32(&fatalNoStacks) > 0 { + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(1) + } + // Dump all goroutine stacks before exiting. + // First, make sure we see the trace for the current goroutine on standard error. + // If -logtostderr has been specified, the loop below will do that anyway + // as the first stack in the full dump. + if !l.toStderr { + os.Stderr.Write(stacks(false)) + } + // Write the stack trace for all goroutines to the files. + trace := stacks(true) + logExitFunc = func(error) {} // If we get a write error, we'll still exit below. + for log := fatalLog; log >= infoLog; log-- { + if f := l.file[log]; f != nil { // Can be nil if -logtostderr is set. + f.Write(trace) + } + } + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway. + } + l.putBuffer(buf) + l.mu.Unlock() + if stats := severityStats[s]; stats != nil { + atomic.AddInt64(&stats.lines, 1) + atomic.AddInt64(&stats.bytes, int64(len(data))) + } +} + +// timeoutFlush calls Flush and returns when it completes or after timeout +// elapses, whichever happens first. This is needed because the hooks invoked +// by Flush may deadlock when glog.Fatal is called from a hook that holds +// a lock. +func timeoutFlush(timeout time.Duration) { + done := make(chan bool, 1) + go func() { + Flush() // calls logging.lockAndFlushAll() + done <- true + }() + select { + case <-done: + case <-time.After(timeout): + fmt.Fprintln(os.Stderr, "glog: Flush took longer than", timeout) + } +} + +// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines. +func stacks(all bool) []byte { + // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though. + n := 10000 + if all { + n = 100000 + } + var trace []byte + for i := 0; i < 5; i++ { + trace = make([]byte, n) + nbytes := runtime.Stack(trace, all) + if nbytes < len(trace) { + return trace[:nbytes] + } + n *= 2 + } + return trace +} + +// logExitFunc provides a simple mechanism to override the default behavior +// of exiting on error. Used in testing and to guarantee we reach a required exit +// for fatal logs. Instead, exit could be a function rather than a method but that +// would make its use clumsier. +var logExitFunc func(error) + +// exit is called if there is trouble creating or writing log files. +// It flushes the logs and exits the program; there's no point in hanging around. +// l.mu is held. +func (l *loggingT) exit(err error) { + fmt.Fprintf(os.Stderr, "log: exiting because of error: %s\n", err) + // If logExitFunc is set, we do that instead of exiting. + if logExitFunc != nil { + logExitFunc(err) + return + } + l.flushAll() + os.Exit(2) +} + +// syncBuffer joins a bufio.Writer to its underlying file, providing access to the +// file's Sync method and providing a wrapper for the Write method that provides log +// file rotation. There are conflicting methods, so the file cannot be embedded. +// l.mu is held for all its methods. +type syncBuffer struct { + logger *loggingT + *bufio.Writer + file *os.File + sev severity + nbytes uint64 // The number of bytes written to this file +} + +func (sb *syncBuffer) Sync() error { + return sb.file.Sync() +} + +func (sb *syncBuffer) Write(p []byte) (n int, err error) { + if sb.nbytes+uint64(len(p)) >= MaxSize { + if err := sb.rotateFile(time.Now()); err != nil { + sb.logger.exit(err) + } + } + n, err = sb.Writer.Write(p) + sb.nbytes += uint64(n) + if err != nil { + sb.logger.exit(err) + } + return +} + +// rotateFile closes the syncBuffer's file and starts a new one. +func (sb *syncBuffer) rotateFile(now time.Time) error { + if sb.file != nil { + sb.Flush() + sb.file.Close() + } + var err error + sb.file, _, err = create(severityName[sb.sev], now) + sb.nbytes = 0 + if err != nil { + return err + } + + sb.Writer = bufio.NewWriterSize(sb.file, bufferSize) + + // Write header. + var buf bytes.Buffer + fmt.Fprintf(&buf, "Log file created at: %s\n", now.Format("2006/01/02 15:04:05")) + fmt.Fprintf(&buf, "Running on machine: %s\n", host) + fmt.Fprintf(&buf, "Binary: Built with %s %s for %s/%s\n", runtime.Compiler, runtime.Version(), runtime.GOOS, runtime.GOARCH) + fmt.Fprintf(&buf, "Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg\n") + n, err := sb.file.Write(buf.Bytes()) + sb.nbytes += uint64(n) + return err +} + +// bufferSize sizes the buffer associated with each log file. It's large +// so that log records can accumulate without the logging thread blocking +// on disk I/O. The flushDaemon will block instead. +const bufferSize = 256 * 1024 + +// createFiles creates all the log files for severity from sev down to infoLog. +// l.mu is held. +func (l *loggingT) createFiles(sev severity) error { + now := time.Now() + // Files are created in decreasing severity order, so as soon as we find one + // has already been created, we can stop. + for s := sev; s >= infoLog && l.file[s] == nil; s-- { + sb := &syncBuffer{ + logger: l, + sev: s, + } + if err := sb.rotateFile(now); err != nil { + return err + } + l.file[s] = sb + } + return nil +} + +const flushInterval = 30 * time.Second + +// flushDaemon periodically flushes the log file buffers. +func (l *loggingT) flushDaemon() { + for range time.NewTicker(flushInterval).C { + l.lockAndFlushAll() + } +} + +// lockAndFlushAll is like flushAll but locks l.mu first. +func (l *loggingT) lockAndFlushAll() { + l.mu.Lock() + l.flushAll() + l.mu.Unlock() +} + +// flushAll flushes all the logs and attempts to "sync" their data to disk. +// l.mu is held. +func (l *loggingT) flushAll() { + // Flush from fatal down, in case there's trouble flushing. + for s := fatalLog; s >= infoLog; s-- { + file := l.file[s] + if file != nil { + file.Flush() // ignore error + file.Sync() // ignore error + } + } +} + +// CopyStandardLogTo arranges for messages written to the Go "log" package's +// default logs to also appear in the Google logs for the named and lower +// severities. Subsequent changes to the standard log's default output location +// or format may break this behavior. +// +// Valid names are "INFO", "WARNING", "ERROR", and "FATAL". If the name is not +// recognized, CopyStandardLogTo panics. +func CopyStandardLogTo(name string) { + sev, ok := severityByName(name) + if !ok { + panic(fmt.Sprintf("log.CopyStandardLogTo(%q): unrecognized severity name", name)) + } + // Set a log format that captures the user's file and line: + // d.go:23: message + stdLog.SetFlags(stdLog.Lshortfile) + stdLog.SetOutput(logBridge(sev)) +} + +// logBridge provides the Write method that enables CopyStandardLogTo to connect +// Go's standard logs to the logs provided by this package. +type logBridge severity + +// Write parses the standard logging line and passes its components to the +// logger for severity(lb). +func (lb logBridge) Write(b []byte) (n int, err error) { + var ( + file = "???" + line = 1 + text string + ) + // Split "d.go:23: message" into "d.go", "23", and "message". + if parts := bytes.SplitN(b, []byte{':'}, 3); len(parts) != 3 || len(parts[0]) < 1 || len(parts[2]) < 1 { + text = fmt.Sprintf("bad log format: %s", b) + } else { + file = string(parts[0]) + text = string(parts[2][1:]) // skip leading space + line, err = strconv.Atoi(string(parts[1])) + if err != nil { + text = fmt.Sprintf("bad line number: %s", b) + line = 1 + } + } + // printWithFileLine with alsoToStderr=true, so standard log messages + // always appear on standard error. + logging.printWithFileLine(severity(lb), file, line, true, text) + return len(b), nil +} + +// setV computes and remembers the V level for a given PC +// when vmodule is enabled. +// File pattern matching takes the basename of the file, stripped +// of its .go suffix, and uses filepath.Match, which is a little more +// general than the *? matching used in C++. +// l.mu is held. +func (l *loggingT) setV(pc uintptr) Level { + fn := runtime.FuncForPC(pc) + file, _ := fn.FileLine(pc) + // The file is something like /a/b/c/d.go. We want just the d. + if strings.HasSuffix(file, ".go") { + file = file[:len(file)-3] + } + if slash := strings.LastIndex(file, "/"); slash >= 0 { + file = file[slash+1:] + } + for _, filter := range l.vmodule.filter { + if filter.match(file) { + l.vmap[pc] = filter.level + return filter.level + } + } + l.vmap[pc] = 0 + return 0 +} + +// Verbose is a boolean type that implements Infof (like Printf) etc. +// See the documentation of V for more information. +type Verbose bool + +// V reports whether verbosity at the call site is at least the requested level. +// The returned value is a boolean of type Verbose, which implements Info, Infoln +// and Infof. These methods will write to the Info log if called. +// Thus, one may write either +// if glog.V(2) { glog.Info("log this") } +// or +// glog.V(2).Info("log this") +// The second form is shorter but the first is cheaper if logging is off because it does +// not evaluate its arguments. +// +// Whether an individual call to V generates a log record depends on the setting of +// the -v and --vmodule flags; both are off by default. If the level in the call to +// V is at most the value of -v, or of -vmodule for the source file containing the +// call, the V call will log. +func V(level Level) Verbose { + // This function tries hard to be cheap unless there's work to do. + // The fast path is two atomic loads and compares. + + // Here is a cheap but safe test to see if V logging is enabled globally. + if logging.verbosity.get() >= level { + return Verbose(true) + } + + // It's off globally but it vmodule may still be set. + // Here is another cheap but safe test to see if vmodule is enabled. + if atomic.LoadInt32(&logging.filterLength) > 0 { + // Now we need a proper lock to use the logging structure. The pcs field + // is shared so we must lock before accessing it. This is fairly expensive, + // but if V logging is enabled we're slow anyway. + logging.mu.Lock() + defer logging.mu.Unlock() + if runtime.Callers(2, logging.pcs[:]) == 0 { + return Verbose(false) + } + v, ok := logging.vmap[logging.pcs[0]] + if !ok { + v = logging.setV(logging.pcs[0]) + } + return Verbose(v >= level) + } + return Verbose(false) +} + +// Info is equivalent to the global Info function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Info(args ...interface{}) { + if v { + logging.print(infoLog, args...) + } +} + +// Infoln is equivalent to the global Infoln function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infoln(args ...interface{}) { + if v { + logging.println(infoLog, args...) + } +} + +// Infof is equivalent to the global Infof function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infof(format string, args ...interface{}) { + if v { + logging.printf(infoLog, format, args...) + } +} + +// Info logs to the INFO log. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Info(args ...interface{}) { + logging.print(infoLog, args...) +} + +// InfoDepth acts as Info but uses depth to determine which call frame to log. +// InfoDepth(0, "msg") is the same as Info("msg"). +func InfoDepth(depth int, args ...interface{}) { + logging.printDepth(infoLog, depth, args...) +} + +// Infoln logs to the INFO log. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Infoln(args ...interface{}) { + logging.println(infoLog, args...) +} + +// Infof logs to the INFO log. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Infof(format string, args ...interface{}) { + logging.printf(infoLog, format, args...) +} + +// Warning logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Warning(args ...interface{}) { + logging.print(warningLog, args...) +} + +// WarningDepth acts as Warning but uses depth to determine which call frame to log. +// WarningDepth(0, "msg") is the same as Warning("msg"). +func WarningDepth(depth int, args ...interface{}) { + logging.printDepth(warningLog, depth, args...) +} + +// Warningln logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Warningln(args ...interface{}) { + logging.println(warningLog, args...) +} + +// Warningf logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Warningf(format string, args ...interface{}) { + logging.printf(warningLog, format, args...) +} + +// Error logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Error(args ...interface{}) { + logging.print(errorLog, args...) +} + +// ErrorDepth acts as Error but uses depth to determine which call frame to log. +// ErrorDepth(0, "msg") is the same as Error("msg"). +func ErrorDepth(depth int, args ...interface{}) { + logging.printDepth(errorLog, depth, args...) +} + +// Errorln logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Errorln(args ...interface{}) { + logging.println(errorLog, args...) +} + +// Errorf logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Errorf(format string, args ...interface{}) { + logging.printf(errorLog, format, args...) +} + +// Fatal logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Fatal(args ...interface{}) { + logging.print(fatalLog, args...) +} + +// FatalDepth acts as Fatal but uses depth to determine which call frame to log. +// FatalDepth(0, "msg") is the same as Fatal("msg"). +func FatalDepth(depth int, args ...interface{}) { + logging.printDepth(fatalLog, depth, args...) +} + +// Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Fatalln(args ...interface{}) { + logging.println(fatalLog, args...) +} + +// Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Fatalf(format string, args ...interface{}) { + logging.printf(fatalLog, format, args...) +} + +// fatalNoStacks is non-zero if we are to exit without dumping goroutine stacks. +// It allows Exit and relatives to use the Fatal logs. +var fatalNoStacks uint32 + +// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Exit(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.print(fatalLog, args...) +} + +// ExitDepth acts as Exit but uses depth to determine which call frame to log. +// ExitDepth(0, "msg") is the same as Exit("msg"). +func ExitDepth(depth int, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printDepth(fatalLog, depth, args...) +} + +// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +func Exitln(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.println(fatalLog, args...) +} + +// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Exitf(format string, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printf(fatalLog, format, args...) +} diff --git a/vendor/github.com/golang/glog/glog_file.go b/vendor/github.com/golang/glog/glog_file.go new file mode 100644 index 0000000000..65075d2811 --- /dev/null +++ b/vendor/github.com/golang/glog/glog_file.go @@ -0,0 +1,124 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// File I/O for logs. + +package glog + +import ( + "errors" + "flag" + "fmt" + "os" + "os/user" + "path/filepath" + "strings" + "sync" + "time" +) + +// MaxSize is the maximum size of a log file in bytes. +var MaxSize uint64 = 1024 * 1024 * 1800 + +// logDirs lists the candidate directories for new log files. +var logDirs []string + +// If non-empty, overrides the choice of directory in which to write logs. +// See createLogDirs for the full list of possible destinations. +var logDir = flag.String("log_dir", "", "If non-empty, write log files in this directory") + +func createLogDirs() { + if *logDir != "" { + logDirs = append(logDirs, *logDir) + } + logDirs = append(logDirs, os.TempDir()) +} + +var ( + pid = os.Getpid() + program = filepath.Base(os.Args[0]) + host = "unknownhost" + userName = "unknownuser" +) + +func init() { + h, err := os.Hostname() + if err == nil { + host = shortHostname(h) + } + + current, err := user.Current() + if err == nil { + userName = current.Username + } + + // Sanitize userName since it may contain filepath separators on Windows. + userName = strings.Replace(userName, `\`, "_", -1) +} + +// shortHostname returns its argument, truncating at the first period. +// For instance, given "www.google.com" it returns "www". +func shortHostname(hostname string) string { + if i := strings.Index(hostname, "."); i >= 0 { + return hostname[:i] + } + return hostname +} + +// logName returns a new log file name containing tag, with start time t, and +// the name for the symlink for tag. +func logName(tag string, t time.Time) (name, link string) { + name = fmt.Sprintf("%s.%s.%s.log.%s.%04d%02d%02d-%02d%02d%02d.%d", + program, + host, + userName, + tag, + t.Year(), + t.Month(), + t.Day(), + t.Hour(), + t.Minute(), + t.Second(), + pid) + return name, program + "." + tag +} + +var onceLogDirs sync.Once + +// create creates a new log file and returns the file and its filename, which +// contains tag ("INFO", "FATAL", etc.) and t. If the file is created +// successfully, create also attempts to update the symlink for that tag, ignoring +// errors. +func create(tag string, t time.Time) (f *os.File, filename string, err error) { + onceLogDirs.Do(createLogDirs) + if len(logDirs) == 0 { + return nil, "", errors.New("log: no log dirs") + } + name, link := logName(tag, t) + var lastErr error + for _, dir := range logDirs { + fname := filepath.Join(dir, name) + f, err := os.Create(fname) + if err == nil { + symlink := filepath.Join(dir, link) + os.Remove(symlink) // ignore err + os.Symlink(name, symlink) // ignore err + return f, fname, nil + } + lastErr = err + } + return nil, "", fmt.Errorf("log: cannot create log: %v", lastErr) +} diff --git a/vendor/github.com/prometheus/prometheus/model/labels/regexp.go b/vendor/github.com/prometheus/prometheus/model/labels/regexp.go index e1081f80ae..980bd73324 100644 --- a/vendor/github.com/prometheus/prometheus/model/labels/regexp.go +++ b/vendor/github.com/prometheus/prometheus/model/labels/regexp.go @@ -15,10 +15,12 @@ package labels import ( "strings" + "time" + "github.com/DmitriyVTitov/size" + "github.com/dgraph-io/ristretto" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" - lru "github.com/hashicorp/golang-lru/v2" ) const ( @@ -29,14 +31,22 @@ const ( // to match values instead of iterating over a list. This value has // been computed running BenchmarkOptimizeEqualStringMatchers. minEqualMultiStringMatcherMapThreshold = 16 + + fastRegexMatcherCacheMaxSizeBytes = 1024 * 1024 * 1024 // 1GB + fastRegexMatcherCacheTTL = 5 * time.Minute ) -var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher] +var fastRegexMatcherCache *ristretto.Cache func init() { - // Ignore error because it can only return error if size is invalid, - // but we're using an hardcoded size here. - fastRegexMatcherCache, _ = lru.New[string, *FastRegexMatcher](10000) + // Ignore error because it can only return error if config is invalid, + // but we're using an hardcoded static config here. + fastRegexMatcherCache, _ = ristretto.NewCache(&ristretto.Config{ + NumCounters: 100_000, // 10x the max number of expected items (takes 3 bytes per counter), + MaxCost: fastRegexMatcherCacheMaxSizeBytes, + BufferItems: 64, // Recommended default per the Config docs, + Metrics: false, + }) } type FastRegexMatcher struct { @@ -58,7 +68,7 @@ type FastRegexMatcher struct { func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { // Check the cache. if matcher, ok := fastRegexMatcherCache.Get(v); ok { - return matcher, nil + return matcher.(*FastRegexMatcher), nil } // Create a new matcher. @@ -68,7 +78,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { } // Cache it. - fastRegexMatcherCache.Add(v, matcher) + fastRegexMatcherCache.SetWithTTL(v, matcher, int64(size.Of(matcher)), fastRegexMatcherCacheTTL) return matcher, nil } diff --git a/vendor/github.com/prometheus/prometheus/util/testutil/testing.go b/vendor/github.com/prometheus/prometheus/util/testutil/testing.go index 31e0ee9bcd..c455bbdbe9 100644 --- a/vendor/github.com/prometheus/prometheus/util/testutil/testing.go +++ b/vendor/github.com/prometheus/prometheus/util/testutil/testing.go @@ -40,5 +40,9 @@ func TolerantVerifyLeak(m *testing.M) { // positives. // https://github.com/kubernetes/client-go/blob/f6ce18ae578c8cca64d14ab9687824d9e1305a67/util/workqueue/queue.go#L201 goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*Type).updateUnfinishedWorkLoop"), + // Ignore "ristretto" and its dependency "glog". + goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"), + goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"), + goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"), ) } diff --git a/vendor/modules.txt b/vendor/modules.txt index a39c4673bf..be5fc056f2 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -88,6 +88,9 @@ github.com/AzureAD/microsoft-authentication-library-for-go/apps/internal/options github.com/AzureAD/microsoft-authentication-library-for-go/apps/internal/shared github.com/AzureAD/microsoft-authentication-library-for-go/apps/internal/version github.com/AzureAD/microsoft-authentication-library-for-go/apps/public +# github.com/DmitriyVTitov/size v1.5.0 +## explicit; go 1.14 +github.com/DmitriyVTitov/size # github.com/HdrHistogram/hdrhistogram-go v1.1.2 ## explicit; go 1.14 # github.com/alecthomas/chroma v0.10.0 @@ -300,6 +303,11 @@ github.com/davecgh/go-spew/spew # github.com/dennwc/varint v1.0.0 ## explicit; go 1.12 github.com/dennwc/varint +# github.com/dgraph-io/ristretto v0.1.1 +## explicit; go 1.12 +github.com/dgraph-io/ristretto +github.com/dgraph-io/ristretto/z +github.com/dgraph-io/ristretto/z/simd # github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f ## explicit github.com/dgryski/go-rendezvous @@ -435,6 +443,9 @@ github.com/gogo/status # github.com/golang-jwt/jwt/v4 v4.5.0 ## explicit; go 1.16 github.com/golang-jwt/jwt/v4 +# github.com/golang/glog v1.0.0 +## explicit; go 1.11 +github.com/golang/glog # github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da ## explicit github.com/golang/groupcache/lru @@ -833,7 +844,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.8.2-0.20220620125440-d7e7b8e04b5e => github.com/grafana/mimir-prometheus v0.0.0-20230413082406-8ef48ad9a7f0 +# github.com/prometheus/prometheus v1.8.2-0.20220620125440-d7e7b8e04b5e => github.com/grafana/mimir-prometheus v0.0.0-20230417132058-c461e223418b ## explicit; go 1.19 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1422,7 +1433,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk # sigs.k8s.io/yaml v1.3.0 ## explicit; go 1.12 sigs.k8s.io/yaml -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230413082406-8ef48ad9a7f0 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230417132058-c461e223418b # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # google.golang.org/grpc => google.golang.org/grpc v1.47.0 # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094