Skip to content

Commit

Permalink
Merge pull request #1370 from ipfs/feat/badger-gc
Browse files Browse the repository at this point in the history
Fix #1320: Add automatic GC to Badger datastore
  • Loading branch information
hsanjuan committed Jul 1, 2021
2 parents f7a2e4a + 4ac2cf3 commit c9783c0
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 4 deletions.
5 changes: 5 additions & 0 deletions config/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ func SetIfNotDefault(src interface{}, dest interface{}) {
if n != 0 {
*dest.(*int) = n
}
case float64:
n := src.(float64)
if n != 0 {
*dest.(*float64) = n
}
case bool:
b := src.(bool)
if b {
Expand Down
2 changes: 2 additions & 0 deletions config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ var testingCrdtCfg = []byte(`{

var testingBadgerCfg = []byte(`{
"folder": "badgerFromTests",
"gc_interval": "0m",
"gc_sleep": "0m",
"badger_options": {
"max_table_size": 1048576
}
Expand Down
7 changes: 6 additions & 1 deletion datastore/badger/badger.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ func New(cfg *Config) (ds.Datastore, error) {
if err != nil {
return nil, errors.Wrap(err, "creating badger folder")
}
opts := badgerds.Options{Options: cfg.BadgerOptions}
opts := badgerds.Options{
GcDiscardRatio: cfg.GCDiscardRatio,
GcInterval: cfg.GCInterval,
GcSleep: cfg.GCSleep,
Options: cfg.BadgerOptions,
}
return badgerds.NewDatastore(folder, &opts)
}

Expand Down
49 changes: 47 additions & 2 deletions datastore/badger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"path/filepath"
"time"

"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
Expand All @@ -25,6 +26,13 @@ var (
// DefaultBadgerOptions has to be a var because badger.DefaultOptions
// is. Values are customized during Init().
DefaultBadgerOptions badger.Options

// DefaultGCDiscardRatio for GC operations. See Badger docs.
DefaultGCDiscardRatio float64 = 0.2
// DefaultGCInterval specifies interval between GC cycles.
DefaultGCInterval time.Duration = 15 * time.Minute
// DefaultGCSleep specifies sleep time between GC rounds.
DefaultGCSleep time.Duration = 10 * time.Second
)

func init() {
Expand All @@ -49,6 +57,16 @@ type Config struct {
// the base configuration folder.
Folder string

// For GC operation. See Badger documentation.
GCDiscardRatio float64

// Interval between GC cycles. Each GC cycle runs one or more
// rounds separated by GCSleep.
GCInterval time.Duration

// Time between rounds in a GC cycle
GCSleep time.Duration

BadgerOptions badger.Options
}

Expand Down Expand Up @@ -132,8 +150,11 @@ func (bo *badgerOptions) Marshal(badgerOpts *badger.Options) {
}

type jsonConfig struct {
Folder string `json:"folder,omitempty"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
Folder string `json:"folder,omitempty"`
GCDiscardRatio float64 `json:"gc_discard_ratio"`
GCInterval string `json:"gc_interval"`
GCSleep string `json:"gc_sleep"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
}

// ConfigKey returns a human-friendly identifier for this type of Datastore.
Expand All @@ -144,6 +165,9 @@ func (cfg *Config) ConfigKey() string {
// Default initializes this Config with sensible values.
func (cfg *Config) Default() error {
cfg.Folder = DefaultSubFolder
cfg.GCDiscardRatio = DefaultGCDiscardRatio
cfg.GCInterval = DefaultGCInterval
cfg.GCSleep = DefaultGCSleep
cfg.BadgerOptions = DefaultBadgerOptions
return nil
}
Expand All @@ -167,6 +191,10 @@ func (cfg *Config) Validate() error {
return errors.New("folder is unset")
}

if cfg.GCDiscardRatio <= 0 || cfg.GCDiscardRatio >= 1 {
return errors.New("gc_discard_ratio must be more than 0 and less than 1")
}

return nil
}

Expand All @@ -186,6 +214,19 @@ func (cfg *Config) LoadJSON(raw []byte) error {
func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error {
config.SetIfNotDefault(jcfg.Folder, &cfg.Folder)

// 0 is an invalid option anyways. In that case, set default (0.2)
config.SetIfNotDefault(jcfg.GCDiscardRatio, &cfg.GCDiscardRatio)

// If these durations are set, GC is enabled by default with default
// values.
err := config.ParseDurations("badger",
&config.DurationOpt{Duration: jcfg.GCInterval, Dst: &cfg.GCInterval, Name: "gc_interval"},
&config.DurationOpt{Duration: jcfg.GCSleep, Dst: &cfg.GCSleep, Name: "gc_sleep"},
)
if err != nil {
return err
}

badgerOpts := jcfg.BadgerOptions.Unmarshal()

if err := mergo.Merge(&cfg.BadgerOptions, badgerOpts, mergo.WithOverride); err != nil {
Expand Down Expand Up @@ -219,6 +260,10 @@ func (cfg *Config) toJSONConfig() *jsonConfig {
jCfg.Folder = cfg.Folder
}

jCfg.GCDiscardRatio = cfg.GCDiscardRatio
jCfg.GCInterval = cfg.GCInterval.String()
jCfg.GCSleep = cfg.GCSleep.String()

bo := &badgerOptions{}
bo.Marshal(&cfg.BadgerOptions)
jCfg.BadgerOptions = *bo
Expand Down
28 changes: 28 additions & 0 deletions datastore/badger/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package badger

import (
"testing"
"time"

"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
Expand All @@ -10,6 +11,8 @@ import (
var cfgJSON = []byte(`
{
"folder": "test",
"gc_discard_ratio": 0.1,
"gc_sleep": "2m",
"badger_options": {
"max_levels": 4,
"value_log_loading_mode": 0
Expand All @@ -29,6 +32,18 @@ func TestToJSON(t *testing.T) {
cfg := &Config{}
cfg.LoadJSON(cfgJSON)

if cfg.GCDiscardRatio != 0.1 {
t.Fatal("GCDiscardRatio should be 0.1")
}

if cfg.GCInterval != DefaultGCInterval {
t.Fatal("GCInterval should default as it is unset")
}

if cfg.GCSleep != 2*time.Minute {
t.Fatal("GCSleep should be 2m")
}

if cfg.BadgerOptions.ValueLogLoadingMode != options.FileIO {
t.Fatalf("got: %d, want: %d", cfg.BadgerOptions.ValueLogLoadingMode, options.FileIO)
}
Expand Down Expand Up @@ -60,3 +75,16 @@ func TestToJSON(t *testing.T) {
t.Fatal(err)
}
}

func TestDefault(t *testing.T) {
cfg := &Config{}
cfg.Default()
if cfg.Validate() != nil {
t.Fatal("error validating")
}

cfg.GCDiscardRatio = 0
if cfg.Validate() == nil {
t.Fatal("expected error validating")
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/ipfs/go-block-format v0.0.3
github.com/ipfs/go-cid v0.0.7
github.com/ipfs/go-datastore v0.4.5
github.com/ipfs/go-ds-badger v0.2.6
github.com/ipfs/go-ds-badger v0.2.7
github.com/ipfs/go-ds-crdt v0.1.20
github.com/ipfs/go-ds-leveldb v0.4.2
github.com/ipfs/go-fs-lock v0.0.6
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,8 @@ github.com/ipfs/go-ds-badger v0.2.1/go.mod h1:Tx7l3aTph3FMFrRS838dcSJh+jjA7cX9Dr
github.com/ipfs/go-ds-badger v0.2.3/go.mod h1:pEYw0rgg3FIrywKKnL+Snr+w/LjJZVMTBRn4FS6UHUk=
github.com/ipfs/go-ds-badger v0.2.6 h1:Hy8jw4rifxtRDrqpvC1yh36oIyE37KDzsUzlHUPOFiU=
github.com/ipfs/go-ds-badger v0.2.6/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-badger v0.2.7 h1:ju5REfIm+v+wgVnQ19xGLYPHYHbYLR6qJfmMbCDSK1I=
github.com/ipfs/go-ds-badger v0.2.7/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-crdt v0.1.20 h1:4iJPmZSXq4/2gLOq0fVH3ROYDjw39vgdCyJF7akkdvE=
github.com/ipfs/go-ds-crdt v0.1.20/go.mod h1:1LiDiHfnunQ6UfilPCkgtlWTX8vWP9hiQt4Q5GK+jaE=
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=
Expand Down

0 comments on commit c9783c0

Please sign in to comment.