From 5e4b0081065925ab9d04009cd4fb559c4cceb304 Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Thu, 16 Mar 2017 20:17:27 -0700 Subject: [PATCH 1/2] *: base initial mmap size on quota size --- etcdctl/ctlv3/command/migrate_command.go | 2 +- etcdserver/server.go | 14 ++++++- lease/lessor_test.go | 5 ++- mvcc/backend/backend.go | 51 +++++++++++++++++++----- mvcc/backend/backend_bench_test.go | 4 +- mvcc/backend/backend_test.go | 4 +- mvcc/backend/boltoption_linux.go | 3 +- tools/benchmark/cmd/mvcc.go | 4 +- 8 files changed, 65 insertions(+), 22 deletions(-) diff --git a/etcdctl/ctlv3/command/migrate_command.go b/etcdctl/ctlv3/command/migrate_command.go index ea17476d994..634ebb94488 100644 --- a/etcdctl/ctlv3/command/migrate_command.go +++ b/etcdctl/ctlv3/command/migrate_command.go @@ -106,7 +106,7 @@ func prepareBackend() backend.Backend { dbpath := filepath.Join(migrateDatadir, "member", "snap", "db") go func() { defer close(bch) - be = backend.New(dbpath, time.Second, 10000) + be = backend.NewDefaultBackend(dbpath) }() select { diff --git a/etcdserver/server.go b/etcdserver/server.go index 7a272a0f74f..63e751ea48a 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -270,7 +270,7 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { var be backend.Backend beOpened := make(chan struct{}) go func() { - be = backend.NewDefaultBackend(bepath) + be = newBackend(bepath, cfg.QuotaBackendBytes) beOpened <- struct{}{} }() @@ -809,7 +809,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) { plog.Panicf("rename snapshot file error: %v", err) } - newbe := backend.NewDefaultBackend(fn) + newbe := newBackend(fn, s.Cfg.QuotaBackendBytes) // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. @@ -1653,3 +1653,13 @@ func (s *EtcdServer) goAttach(f func()) { f() }() } + +func newBackend(path string, quotaBytes int64) backend.Backend { + bcfg := backend.DefaultBackendConfig() + bcfg.Path = path + if quotaBytes > 0 && quotaBytes != DefaultQuotaBytes { + // permit 10% excess over quota for disarm + bcfg.MmapSize = uint64(quotaBytes + quotaBytes/10) + } + return backend.New(bcfg) +} diff --git a/lease/lessor_test.go b/lease/lessor_test.go index d417c15f771..39d9eea45c8 100644 --- a/lease/lessor_test.go +++ b/lease/lessor_test.go @@ -390,6 +390,7 @@ func NewTestBackend(t *testing.T) (string, backend.Backend) { if err != nil { t.Fatalf("failed to create tmpdir (%v)", err) } - - return tmpPath, backend.New(filepath.Join(tmpPath, "be"), time.Second, 10000) + bcfg := backend.DefaultBackendConfig() + bcfg.Path = filepath.Join(tmpPath, "be") + return tmpPath, backend.New(bcfg) } diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 56a1b69e126..0a559295cee 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -35,10 +35,10 @@ var ( defragLimit = 10000 - // InitialMmapSize is the initial size of the mmapped region. Setting this larger than + // initialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. // This only works for linux. - InitialMmapSize = int64(10 * 1024 * 1024 * 1024) + initialMmapSize = uint64(10 * 1024 * 1024 * 1024) plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "mvcc/backend") ) @@ -96,18 +96,45 @@ type backend struct { donec chan struct{} } -func New(path string, d time.Duration, limit int) Backend { - return newBackend(path, d, limit) +type BackendConfig struct { + // Path is the file path to the backend file. + Path string + // BatchInterval is the maximum time before flushing the BatchTx. + BatchInterval time.Duration + // BatchLimit is the maximum puts before flushing the BatchTx. + BatchLimit int + // MmapSize is the number of bytes to mmap for the backend. + MmapSize uint64 +} + +func DefaultBackendConfig() BackendConfig { + return BackendConfig{ + BatchInterval: defaultBatchInterval, + BatchLimit: defaultBatchLimit, + MmapSize: initialMmapSize, + } +} + +func New(bcfg BackendConfig) Backend { + return newBackend(bcfg) } func NewDefaultBackend(path string) Backend { - return newBackend(path, defaultBatchInterval, defaultBatchLimit) + bcfg := DefaultBackendConfig() + bcfg.Path = path + return newBackend(bcfg) } -func newBackend(path string, d time.Duration, limit int) *backend { - db, err := bolt.Open(path, 0600, boltOpenOptions) +func newBackend(bcfg BackendConfig) *backend { + bopts := &bolt.Options{} + if boltOpenOptions != nil { + *bopts = *boltOpenOptions + } + bopts.InitialMmapSize = int(bcfg.MmapSize) + + db, err := bolt.Open(bcfg.Path, 0600, bopts) if err != nil { - plog.Panicf("cannot open database at %s (%v)", path, err) + plog.Panicf("cannot open database at %s (%v)", bcfg.Path, err) } // In future, may want to make buffering optional for low-concurrency systems @@ -115,8 +142,8 @@ func newBackend(path string, d time.Duration, limit int) *backend { b := &backend{ db: db, - batchInterval: d, - batchLimit: limit, + batchInterval: bcfg.BatchInterval, + batchLimit: bcfg.BatchLimit, readTx: &readTx{buf: txReadBuffer{ txBuffer: txBuffer{make(map[string]*bucketBuffer)}}, @@ -358,7 +385,9 @@ func NewTmpBackend(batchInterval time.Duration, batchLimit int) (*backend, strin plog.Fatal(err) } tmpPath := filepath.Join(dir, "database") - return newBackend(tmpPath, batchInterval, batchLimit), tmpPath + bcfg := DefaultBackendConfig() + bcfg.Path, bcfg.BatchInterval, bcfg.BatchLimit = tmpPath, batchInterval, batchLimit + return newBackend(bcfg), tmpPath } func NewDefaultTmpBackend() (*backend, string) { diff --git a/mvcc/backend/backend_bench_test.go b/mvcc/backend/backend_bench_test.go index 6d2570e67b0..30b47516f15 100644 --- a/mvcc/backend/backend_bench_test.go +++ b/mvcc/backend/backend_bench_test.go @@ -22,9 +22,9 @@ import ( ) func BenchmarkBackendPut(b *testing.B) { - backend := New("test", 100*time.Millisecond, 10000) + backend, tmppath := NewTmpBackend(100*time.Millisecond, 10000) defer backend.Close() - defer os.Remove("test") + defer os.Remove(tmppath) // prepare keys keys := make([][]byte, b.N) diff --git a/mvcc/backend/backend_test.go b/mvcc/backend/backend_test.go index 68d0b19599e..af898b5ad3a 100644 --- a/mvcc/backend/backend_test.go +++ b/mvcc/backend/backend_test.go @@ -69,7 +69,9 @@ func TestBackendSnapshot(t *testing.T) { f.Close() // bootstrap new backend from the snapshot - nb := New(f.Name(), time.Hour, 10000) + bcfg := DefaultBackendConfig() + bcfg.Path, bcfg.BatchInterval, bcfg.BatchLimit = f.Name(), time.Hour, 10000 + nb := New(bcfg) defer cleanup(nb, f.Name()) newTx := b.BatchTx() diff --git a/mvcc/backend/boltoption_linux.go b/mvcc/backend/boltoption_linux.go index 4ee9b05a77c..c65b477a0d9 100644 --- a/mvcc/backend/boltoption_linux.go +++ b/mvcc/backend/boltoption_linux.go @@ -27,6 +27,5 @@ import ( // (https://github.com/torvalds/linux/releases/tag/v2.6.23), mmap might // silently ignore this flag. Please update your kernel to prevent this. var boltOpenOptions = &bolt.Options{ - MmapFlags: syscall.MAP_POPULATE, - InitialMmapSize: int(InitialMmapSize), + MmapFlags: syscall.MAP_POPULATE, } diff --git a/tools/benchmark/cmd/mvcc.go b/tools/benchmark/cmd/mvcc.go index e1a157be6ae..446ffed5f34 100644 --- a/tools/benchmark/cmd/mvcc.go +++ b/tools/benchmark/cmd/mvcc.go @@ -32,7 +32,9 @@ var ( ) func initMVCC() { - be := backend.New("mvcc-bench", time.Duration(batchInterval), batchLimit) + bcfg := backend.DefaultBackendConfig() + bcfg.Path, bcfg.BatchInterval, bcfg.BatchLimit = "mvcc-bench", time.Duration(batchInterval)*time.Millisecond, batchLimit + be := backend.New(bcfg) s = mvcc.NewStore(be, &lease.FakeLessor{}, nil) os.Remove("mvcc-bench") // boltDB has an opened fd, so removing the file is ok } From 8a3fee15a356a2bd7db40a909abe6e2b1d7ffb45 Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Thu, 16 Mar 2017 20:20:49 -0700 Subject: [PATCH 2/2] etcdserver, backend: only warn if exceeding max quota --- etcdserver/quota.go | 17 ++++++++++++----- mvcc/backend/backend.go | 9 --------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/etcdserver/quota.go b/etcdserver/quota.go index 088a4696253..87126f1564c 100644 --- a/etcdserver/quota.go +++ b/etcdserver/quota.go @@ -16,7 +16,15 @@ package etcdserver import ( pb "github.com/coreos/etcd/etcdserver/etcdserverpb" - "github.com/coreos/etcd/mvcc/backend" +) + +const ( + // DefaultQuotaBytes is the number of bytes the backend Size may + // consume before exceeding the space quota. + DefaultQuotaBytes = int64(2 * 1024 * 1024 * 1024) // 2GB + // MaxQuotaBytes is the maximum number of bytes suggested for a backend + // quota. A larger quota may lead to degraded performance. + MaxQuotaBytes = int64(8 * 1024 * 1024 * 1024) // 8GB ) // Quota represents an arbitrary quota against arbitrary requests. Each request @@ -57,11 +65,10 @@ func NewBackendQuota(s *EtcdServer) Quota { } if s.Cfg.QuotaBackendBytes == 0 { // use default size if no quota size given - return &backendQuota{s, backend.DefaultQuotaBytes} + return &backendQuota{s, DefaultQuotaBytes} } - if s.Cfg.QuotaBackendBytes > backend.MaxQuotaBytes { - plog.Warningf("backend quota %v exceeds maximum quota %v; using maximum", s.Cfg.QuotaBackendBytes, backend.MaxQuotaBytes) - return &backendQuota{s, backend.MaxQuotaBytes} + if s.Cfg.QuotaBackendBytes > MaxQuotaBytes { + plog.Warningf("backend quota %v exceeds maximum recommended quota %v", s.Cfg.QuotaBackendBytes, MaxQuotaBytes) } return &backendQuota{s, s.Cfg.QuotaBackendBytes} } diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 0a559295cee..e70edb8f344 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -43,15 +43,6 @@ var ( plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "mvcc/backend") ) -const ( - // DefaultQuotaBytes is the number of bytes the backend Size may - // consume before exceeding the space quota. - DefaultQuotaBytes = int64(2 * 1024 * 1024 * 1024) // 2GB - // MaxQuotaBytes is the maximum number of bytes suggested for a backend - // quota. A larger quota may lead to degraded performance. - MaxQuotaBytes = int64(8 * 1024 * 1024 * 1024) // 8GB -) - type Backend interface { ReadTx() ReadTx BatchTx() BatchTx