Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #1320: Add automatic GC to Badger datastore #1370

Merged
merged 1 commit into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ func SetIfNotDefault(src interface{}, dest interface{}) {
if n != 0 {
*dest.(*int) = n
}
case float64:
n := src.(float64)
if n != 0 {
*dest.(*float64) = n
}
case bool:
b := src.(bool)
if b {
Expand Down
2 changes: 2 additions & 0 deletions config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ var testingCrdtCfg = []byte(`{

var testingBadgerCfg = []byte(`{
"folder": "badgerFromTests",
"gc_interval": "0m",
"gc_sleep": "0m",
"badger_options": {
"max_table_size": 1048576
}
Expand Down
7 changes: 6 additions & 1 deletion datastore/badger/badger.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ func New(cfg *Config) (ds.Datastore, error) {
if err != nil {
return nil, errors.Wrap(err, "creating badger folder")
}
opts := badgerds.Options{Options: cfg.BadgerOptions}
opts := badgerds.Options{
GcDiscardRatio: cfg.GCDiscardRatio,
GcInterval: cfg.GCInterval,
GcSleep: cfg.GCSleep,
Options: cfg.BadgerOptions,
}
return badgerds.NewDatastore(folder, &opts)
}

Expand Down
49 changes: 47 additions & 2 deletions datastore/badger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"path/filepath"
"time"

"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
Expand All @@ -25,6 +26,13 @@ var (
// DefaultBadgerOptions has to be a var because badger.DefaultOptions
// is. Values are customized during Init().
DefaultBadgerOptions badger.Options

// DefaultGCDiscardRatio for GC operations. See Badger docs.
DefaultGCDiscardRatio float64 = 0.2
// DefaultGCInterval specifies interval between GC cycles.
DefaultGCInterval time.Duration = 15 * time.Minute
// DefaultGCSleep specifies sleep time between GC rounds.
DefaultGCSleep time.Duration = 10 * time.Second
)

func init() {
Expand All @@ -49,6 +57,16 @@ type Config struct {
// the base configuration folder.
Folder string

// For GC operation. See Badger documentation.
GCDiscardRatio float64

// Interval between GC cycles. Each GC cycle runs one or more
// rounds separated by GCSleep.
GCInterval time.Duration

// Time between rounds in a GC cycle
GCSleep time.Duration

BadgerOptions badger.Options
}

Expand Down Expand Up @@ -132,8 +150,11 @@ func (bo *badgerOptions) Marshal(badgerOpts *badger.Options) {
}

type jsonConfig struct {
Folder string `json:"folder,omitempty"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
Folder string `json:"folder,omitempty"`
GCDiscardRatio float64 `json:"gc_discard_ratio"`
GCInterval string `json:"gc_interval"`
GCSleep string `json:"gc_sleep"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
}

// ConfigKey returns a human-friendly identifier for this type of Datastore.
Expand All @@ -144,6 +165,9 @@ func (cfg *Config) ConfigKey() string {
// Default initializes this Config with sensible values.
func (cfg *Config) Default() error {
cfg.Folder = DefaultSubFolder
cfg.GCDiscardRatio = DefaultGCDiscardRatio
cfg.GCInterval = DefaultGCInterval
cfg.GCSleep = DefaultGCSleep
cfg.BadgerOptions = DefaultBadgerOptions
return nil
}
Expand All @@ -167,6 +191,10 @@ func (cfg *Config) Validate() error {
return errors.New("folder is unset")
}

if cfg.GCDiscardRatio <= 0 || cfg.GCDiscardRatio >= 1 {
return errors.New("gc_discard_ratio must be more than 0 and less than 1")
}

return nil
}

Expand All @@ -186,6 +214,19 @@ func (cfg *Config) LoadJSON(raw []byte) error {
func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error {
config.SetIfNotDefault(jcfg.Folder, &cfg.Folder)

// 0 is an invalid option anyways. In that case, set default (0.2)
config.SetIfNotDefault(jcfg.GCDiscardRatio, &cfg.GCDiscardRatio)

// If these durations are set, GC is enabled by default with default
// values.
err := config.ParseDurations("badger",
&config.DurationOpt{Duration: jcfg.GCInterval, Dst: &cfg.GCInterval, Name: "gc_interval"},
&config.DurationOpt{Duration: jcfg.GCSleep, Dst: &cfg.GCSleep, Name: "gc_sleep"},
)
if err != nil {
return err
}

badgerOpts := jcfg.BadgerOptions.Unmarshal()

if err := mergo.Merge(&cfg.BadgerOptions, badgerOpts, mergo.WithOverride); err != nil {
Expand Down Expand Up @@ -219,6 +260,10 @@ func (cfg *Config) toJSONConfig() *jsonConfig {
jCfg.Folder = cfg.Folder
}

jCfg.GCDiscardRatio = cfg.GCDiscardRatio
jCfg.GCInterval = cfg.GCInterval.String()
jCfg.GCSleep = cfg.GCSleep.String()

bo := &badgerOptions{}
bo.Marshal(&cfg.BadgerOptions)
jCfg.BadgerOptions = *bo
Expand Down
28 changes: 28 additions & 0 deletions datastore/badger/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package badger

import (
"testing"
"time"

"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
Expand All @@ -10,6 +11,8 @@ import (
var cfgJSON = []byte(`
{
"folder": "test",
"gc_discard_ratio": 0.1,
"gc_sleep": "2m",
"badger_options": {
"max_levels": 4,
"value_log_loading_mode": 0
Expand All @@ -29,6 +32,18 @@ func TestToJSON(t *testing.T) {
cfg := &Config{}
cfg.LoadJSON(cfgJSON)

if cfg.GCDiscardRatio != 0.1 {
t.Fatal("GCDiscardRatio should be 0.1")
}

if cfg.GCInterval != DefaultGCInterval {
t.Fatal("GCInterval should default as it is unset")
}

if cfg.GCSleep != 2*time.Minute {
t.Fatal("GCSleep should be 2m")
}

if cfg.BadgerOptions.ValueLogLoadingMode != options.FileIO {
t.Fatalf("got: %d, want: %d", cfg.BadgerOptions.ValueLogLoadingMode, options.FileIO)
}
Expand Down Expand Up @@ -60,3 +75,16 @@ func TestToJSON(t *testing.T) {
t.Fatal(err)
}
}

func TestDefault(t *testing.T) {
cfg := &Config{}
cfg.Default()
if cfg.Validate() != nil {
t.Fatal("error validating")
}

cfg.GCDiscardRatio = 0
if cfg.Validate() == nil {
t.Fatal("expected error validating")
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/ipfs/go-block-format v0.0.3
github.com/ipfs/go-cid v0.0.7
github.com/ipfs/go-datastore v0.4.5
github.com/ipfs/go-ds-badger v0.2.6
github.com/ipfs/go-ds-badger v0.2.7
github.com/ipfs/go-ds-crdt v0.1.20
github.com/ipfs/go-ds-leveldb v0.4.2
github.com/ipfs/go-fs-lock v0.0.6
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,8 @@ github.com/ipfs/go-ds-badger v0.2.1/go.mod h1:Tx7l3aTph3FMFrRS838dcSJh+jjA7cX9Dr
github.com/ipfs/go-ds-badger v0.2.3/go.mod h1:pEYw0rgg3FIrywKKnL+Snr+w/LjJZVMTBRn4FS6UHUk=
github.com/ipfs/go-ds-badger v0.2.6 h1:Hy8jw4rifxtRDrqpvC1yh36oIyE37KDzsUzlHUPOFiU=
github.com/ipfs/go-ds-badger v0.2.6/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-badger v0.2.7 h1:ju5REfIm+v+wgVnQ19xGLYPHYHbYLR6qJfmMbCDSK1I=
github.com/ipfs/go-ds-badger v0.2.7/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-crdt v0.1.20 h1:4iJPmZSXq4/2gLOq0fVH3ROYDjw39vgdCyJF7akkdvE=
github.com/ipfs/go-ds-crdt v0.1.20/go.mod h1:1LiDiHfnunQ6UfilPCkgtlWTX8vWP9hiQt4Q5GK+jaE=
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=
Expand Down