Skip to content

Commit 3db2c82

Browse files
committed
Remove xidmap storage on disk from bulk loader. Peaks to 4M edges/sec on my machine now, up from max 1M/s.
1 parent 87c3674 commit 3db2c82

File tree

1 file changed

+1
-16
lines changed

1 file changed

+1
-16
lines changed

dgraph/cmd/bulk/loader.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"time"
3131

3232
"github.com/dgraph-io/badger"
33-
bo "github.com/dgraph-io/badger/options"
3433

3534
"github.com/dgraph-io/dgraph/chunker"
3635
"github.com/dgraph-io/dgraph/protos/pb"
@@ -81,7 +80,6 @@ type state struct {
8180
type loader struct {
8281
*state
8382
mappers []*mapper
84-
xidDB *badger.DB
8583
zero *grpc.ClientConn
8684
}
8785

@@ -147,19 +145,7 @@ func readSchema(filename string) []*pb.SchemaUpdate {
147145

148146
func (ld *loader) mapStage() {
149147
ld.prog.setPhase(mapPhase)
150-
151-
// TODO: Consider if we need to always store the XIDs in Badger. Things slow down if we do.
152-
xidDir := filepath.Join(ld.opt.TmpDir, "xids")
153-
x.Check(os.Mkdir(xidDir, 0755))
154-
opt := badger.DefaultOptions
155-
opt.SyncWrites = false
156-
opt.TableLoadingMode = bo.MemoryMap
157-
opt.Dir = xidDir
158-
opt.ValueDir = xidDir
159-
var err error
160-
ld.xidDB, err = badger.Open(opt)
161-
x.Check(err)
162-
ld.xids = xidmap.New(ld.zero, ld.xidDB)
148+
ld.xids = xidmap.New(ld.zero, nil)
163149

164150
files := x.FindDataFiles(ld.opt.DataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"})
165151
if len(files) == 0 {
@@ -224,7 +210,6 @@ func (ld *loader) mapStage() {
224210
ld.mappers[i] = nil
225211
}
226212
x.Check(ld.xids.Flush())
227-
x.Check(ld.xidDB.Close())
228213
ld.xids = nil
229214
runtime.GC()
230215
}

0 commit comments

Comments
 (0)