Skip to content

Commit 1725096

Browse files
authored
Speed up StreamWriter (#825)
Use a goroutine to split up the serial part of writing in StreamWriter to allow another core to work on the table encoding. On my desktop, this can write at the rate of 200MBps (1.6Gbps), finishing 1B keys (16B keys, 16B values) in around 3m20s. Changes: * Use a goroutine to split up the serial part of writing in StreamWriter to speed things up. * Limit to 3 pending requests at a time. * Use as many goroutines to process requests as the number of streams. * Update badger.fill tool to send writes to streamwriter concurrently. * Do batching based on size instead of count. * Set the value log head correctly.
1 parent e9447c9 commit 1725096

File tree

2 files changed

+129
-57
lines changed

2 files changed

+129
-57
lines changed

badger/cmd/fill.go

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"encoding/binary"
2121
"log"
2222
"math/rand"
23+
"sync"
2324
"time"
2425

2526
"github.com/dgraph-io/badger"
@@ -79,27 +80,56 @@ func fillSorted(db *badger.DB, num uint64) error {
7980
if err := writer.Prepare(); err != nil {
8081
return err
8182
}
82-
kvs := &pb.KVList{}
83-
for i := uint64(1); i <= num; i++ {
84-
key := make([]byte, 8)
85-
binary.BigEndian.PutUint64(key, i)
86-
kvs.Kv = append(kvs.Kv, &pb.KV{
87-
Key: key,
88-
Value: value,
89-
Version: 1,
90-
})
91-
if len(kvs.Kv) > 1000 {
92-
if err := writer.Write(kvs); err != nil {
93-
return err
83+
84+
wg := &sync.WaitGroup{}
85+
writeCh := make(chan *pb.KVList, 3)
86+
writeRange := func(start, end uint64, streamId uint32) {
87+
// end is not included.
88+
defer wg.Done()
89+
kvs := &pb.KVList{}
90+
var sz int
91+
for i := start; i < end; i++ {
92+
key := make([]byte, 8)
93+
binary.BigEndian.PutUint64(key, i)
94+
kvs.Kv = append(kvs.Kv, &pb.KV{
95+
Key: key,
96+
Value: value,
97+
Version: 1,
98+
StreamId: streamId,
99+
})
100+
sz += len(key) + len(value)
101+
if sz >= 4<<20 { // 4 MB
102+
writeCh <- kvs
103+
kvs = &pb.KVList{}
104+
sz = 0
94105
}
95-
kvs = &pb.KVList{}
96106
}
107+
writeCh <- kvs
97108
}
98-
if len(kvs.Kv) > 0 {
109+
110+
// Let's create some streams.
111+
width := num / 16
112+
streamId := uint32(0)
113+
for start := uint64(0); start < num; start += width {
114+
end := start + width
115+
if end > num {
116+
end = num
117+
}
118+
streamId++
119+
wg.Add(1)
120+
go writeRange(start, end, streamId)
121+
}
122+
go func() {
123+
wg.Wait()
124+
close(writeCh)
125+
}()
126+
log.Printf("Max StreamId used: %d. Width: %d\n", streamId, width)
127+
for kvs := range writeCh {
99128
if err := writer.Write(kvs); err != nil {
100-
return err
129+
panic(err)
101130
}
102131
}
132+
log.Println("DONE streaming. Flushing...")
103133
return writer.Flush()
104134
}
105135

stream_writer.go

Lines changed: 84 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ type StreamWriter struct {
4444
db *DB
4545
done func()
4646
throttle *y.Throttle
47-
head valuePointer
4847
maxVersion uint64
4948
writers map[uint32]*sortedWriter
49+
closer *y.Closer
5050
}
5151

5252
// NewStreamWriter creates a StreamWriter. Right after creating StreamWriter, Prepare must be
@@ -60,6 +60,7 @@ func (db *DB) NewStreamWriter() *StreamWriter {
6060
// concurrent streams being processed.
6161
throttle: y.NewThrottle(16),
6262
writers: make(map[uint32]*sortedWriter),
63+
closer: y.NewCloser(0),
6364
}
6465
}
6566

@@ -74,9 +75,12 @@ func (sw *StreamWriter) Prepare() error {
7475
}
7576

7677
// Write writes KVList to DB. Each KV within the list contains the stream id which StreamWriter
77-
// would use to demux the writes.
78+
// would use to demux the writes. Write is not thread safe and it should NOT be called concurrently.
7879
func (sw *StreamWriter) Write(kvs *pb.KVList) error {
79-
var entries []*Entry
80+
if len(kvs.GetKv()) == 0 {
81+
return nil
82+
}
83+
streamReqs := make(map[uint32]*request)
8084
for _, kv := range kvs.Kv {
8185
var meta, userMeta byte
8286
if len(kv.Meta) > 0 {
@@ -98,50 +102,28 @@ func (sw *StreamWriter) Write(kvs *pb.KVList) error {
98102
// If the value can be colocated with the key in LSM tree, we can skip
99103
// writing the value to value log.
100104
e.skipVlog = sw.db.shouldWriteValueToLSM(*e)
101-
entries = append(entries, e)
105+
req := streamReqs[kv.StreamId]
106+
if req == nil {
107+
req = &request{}
108+
streamReqs[kv.StreamId] = req
109+
}
110+
req.Entries = append(req.Entries, e)
102111
}
103-
req := &request{
104-
Entries: entries,
112+
var all []*request
113+
for _, req := range streamReqs {
114+
all = append(all, req)
105115
}
106-
y.AssertTrue(len(kvs.Kv) == len(req.Entries))
107-
if err := sw.db.vlog.write([]*request{req}); err != nil {
116+
if err := sw.db.vlog.write(all); err != nil {
108117
return err
109118
}
110119

111-
for i, kv := range kvs.Kv {
112-
e := req.Entries[i]
113-
vptr := req.Ptrs[i]
114-
if !vptr.IsZero() {
115-
y.AssertTrue(sw.head.Less(vptr))
116-
sw.head = vptr
117-
}
118-
119-
writer, ok := sw.writers[kv.StreamId]
120+
for streamId, req := range streamReqs {
121+
writer, ok := sw.writers[streamId]
120122
if !ok {
121-
writer = sw.newWriter(kv.StreamId)
122-
sw.writers[kv.StreamId] = writer
123-
}
124-
125-
var vs y.ValueStruct
126-
if e.skipVlog {
127-
vs = y.ValueStruct{
128-
Value: e.Value,
129-
Meta: e.meta,
130-
UserMeta: e.UserMeta,
131-
ExpiresAt: e.ExpiresAt,
132-
}
133-
} else {
134-
vbuf := make([]byte, vptrSize)
135-
vs = y.ValueStruct{
136-
Value: vptr.Encode(vbuf),
137-
Meta: e.meta | bitValuePointer,
138-
UserMeta: e.UserMeta,
139-
ExpiresAt: e.ExpiresAt,
140-
}
141-
}
142-
if err := writer.Add(e.Key, vs); err != nil {
143-
return err
123+
writer = sw.newWriter(streamId)
124+
sw.writers[streamId] = writer
144125
}
126+
writer.reqCh <- req
145127
}
146128
return nil
147129
}
@@ -150,15 +132,21 @@ func (sw *StreamWriter) Write(kvs *pb.KVList) error {
150132
// updates Oracle with maxVersion found in all entries (if DB is not managed).
151133
func (sw *StreamWriter) Flush() error {
152134
defer sw.done()
135+
136+
sw.closer.SignalAndWait()
137+
var maxHead valuePointer
153138
for _, writer := range sw.writers {
154139
if err := writer.Done(); err != nil {
155140
return err
156141
}
142+
if maxHead.Less(writer.head) {
143+
maxHead = writer.head
144+
}
157145
}
158146

159147
// Encode and write the value log head into a new table.
160148
data := make([]byte, vptrSize)
161-
sw.head.Encode(data)
149+
maxHead.Encode(data)
162150
headWriter := sw.newWriter(headStreamId)
163151
if err := headWriter.Add(
164152
y.KeyWithTs(head, sw.maxVersion),
@@ -198,20 +186,74 @@ type sortedWriter struct {
198186
builder *table.Builder
199187
lastKey []byte
200188
streamId uint32
189+
reqCh chan *request
190+
head valuePointer
201191
}
202192

203193
func (sw *StreamWriter) newWriter(streamId uint32) *sortedWriter {
204-
return &sortedWriter{
194+
w := &sortedWriter{
205195
db: sw.db,
206196
streamId: streamId,
207197
throttle: sw.throttle,
208198
builder: table.NewTableBuilder(),
199+
reqCh: make(chan *request, 3),
209200
}
201+
sw.closer.AddRunning(1)
202+
go w.handleRequests(sw.closer)
203+
return w
210204
}
211205

212206
// ErrUnsortedKey is returned when any out of order key arrives at sortedWriter during call to Add.
213207
var ErrUnsortedKey = errors.New("Keys not in sorted order")
214208

209+
func (w *sortedWriter) handleRequests(closer *y.Closer) {
210+
defer closer.Done()
211+
212+
process := func(req *request) {
213+
for i, e := range req.Entries {
214+
vptr := req.Ptrs[i]
215+
if !vptr.IsZero() {
216+
y.AssertTrue(w.head.Less(vptr))
217+
w.head = vptr
218+
}
219+
220+
var vs y.ValueStruct
221+
if e.skipVlog {
222+
vs = y.ValueStruct{
223+
Value: e.Value,
224+
Meta: e.meta,
225+
UserMeta: e.UserMeta,
226+
ExpiresAt: e.ExpiresAt,
227+
}
228+
} else {
229+
vbuf := make([]byte, vptrSize)
230+
vs = y.ValueStruct{
231+
Value: vptr.Encode(vbuf),
232+
Meta: e.meta | bitValuePointer,
233+
UserMeta: e.UserMeta,
234+
ExpiresAt: e.ExpiresAt,
235+
}
236+
}
237+
if err := w.Add(e.Key, vs); err != nil {
238+
panic(err)
239+
}
240+
}
241+
}
242+
243+
for {
244+
select {
245+
case req := <-w.reqCh:
246+
process(req)
247+
case <-closer.HasBeenClosed():
248+
close(w.reqCh)
249+
for req := range w.reqCh {
250+
process(req)
251+
}
252+
return
253+
}
254+
}
255+
}
256+
215257
// Add adds key and vs to sortedWriter.
216258
func (w *sortedWriter) Add(key []byte, vs y.ValueStruct) error {
217259
if bytes.Compare(key, w.lastKey) <= 0 {

0 commit comments

Comments
 (0)