Skip to content

Commit b77971a

Browse files
author
Javier Alvarado
committed
Preserve UIDs in bulk load by default and add --new_uids option to override.
1 parent 248f8db commit b77971a

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

dgraph/cmd/bulk/loader.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ type options struct {
5858
HttpAddr string
5959
IgnoreErrors bool
6060
CustomTokenizers string
61+
NewUids bool
6162

6263
MapShards int
6364
ReduceShards int

dgraph/cmd/bulk/mapper.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"os"
2727
"path/filepath"
2828
"sort"
29+
"strconv"
2930
"strings"
3031
"sync"
3132
"sync/atomic"
@@ -185,11 +186,11 @@ func (m *mapper) addMapEntry(key []byte, p *pb.Posting, shard int) {
185186
}
186187

187188
func (m *mapper) processNQuad(nq gql.NQuad) {
188-
sid := m.lookupUid(nq.GetSubject())
189+
sid := m.uid(nq.GetSubject())
189190
var oid uint64
190191
var de *pb.DirectedEdge
191192
if nq.GetObjectValue() == nil {
192-
oid = m.lookupUid(nq.GetObjectId())
193+
oid = m.uid(nq.GetObjectId())
193194
de = nq.CreateUidEdge(sid, oid)
194195
} else {
195196
var err error
@@ -216,6 +217,17 @@ func (m *mapper) processNQuad(nq gql.NQuad) {
216217
}
217218
}
218219

220+
func (m *mapper) uid(xid string) uint64 {
221+
if !m.opt.NewUids {
222+
if uid, err := strconv.ParseUint(xid, 0, 64); err == nil {
223+
m.xids.BumpTo(uid)
224+
return uid
225+
}
226+
}
227+
228+
return m.lookupUid(xid)
229+
}
230+
219231
func (m *mapper) lookupUid(xid string) uint64 {
220232
uid := m.xids.AssignUid(xid)
221233
if !m.opt.StoreXids {

dgraph/cmd/bulk/run.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ func init() {
9090
"more parallelism, but increases memory usage.")
9191
flag.String("custom_tokenizers", "",
9292
"Comma separated list of tokenizer plugins")
93+
flag.Bool("new_uids", false,
94+
"Ignore UIDs in load files and assign new ones.")
9395
}
9496

9597
func run() {
@@ -113,6 +115,7 @@ func run() {
113115
MapShards: Bulk.Conf.GetInt("map_shards"),
114116
ReduceShards: Bulk.Conf.GetInt("reduce_shards"),
115117
CustomTokenizers: Bulk.Conf.GetString("custom_tokenizers"),
118+
NewUids: Bulk.Conf.GetBool("new_uids"),
116119
}
117120

118121
x.PrintVersion()

0 commit comments

Comments
 (0)