-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
autobatch.go
266 lines (218 loc) · 6.49 KB
/
autobatch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
package blockstore
import (
"context"
"sync"
"time"
"github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
block "github.com/ipfs/go-libipfs/blocks"
"golang.org/x/xerrors"
)
// autolog is a logger for the autobatching blockstore. It is subscoped from the
// blockstore logger.
var autolog = log.Named("auto")
// contains the same set of blocks twice, once as an ordered list for flushing, and as a map for fast access
type blockBatch struct {
blockList []block.Block
blockMap map[cid.Cid]block.Block
}
type AutobatchBlockstore struct {
// TODO: drop if memory consumption is too high
addedCids map[cid.Cid]struct{}
stateLock sync.Mutex
bufferedBatch blockBatch
flushingBatch blockBatch
flushErr error
flushCh chan struct{}
doFlushLock sync.Mutex
flushRetryDelay time.Duration
doneCh chan struct{}
shutdown context.CancelFunc
backingBs Blockstore
bufferCapacity int
bufferSize int
}
func NewAutobatch(ctx context.Context, backingBs Blockstore, bufferCapacity int) *AutobatchBlockstore {
ctx, cancel := context.WithCancel(ctx)
bs := &AutobatchBlockstore{
addedCids: make(map[cid.Cid]struct{}),
backingBs: backingBs,
bufferCapacity: bufferCapacity,
flushCh: make(chan struct{}, 1),
doneCh: make(chan struct{}),
// could be made configable
flushRetryDelay: time.Millisecond * 100,
shutdown: cancel,
}
bs.bufferedBatch.blockMap = make(map[cid.Cid]block.Block)
go bs.flushWorker(ctx)
return bs
}
func (bs *AutobatchBlockstore) Put(ctx context.Context, blk block.Block) error {
bs.stateLock.Lock()
defer bs.stateLock.Unlock()
_, ok := bs.addedCids[blk.Cid()]
if !ok {
bs.addedCids[blk.Cid()] = struct{}{}
bs.bufferedBatch.blockList = append(bs.bufferedBatch.blockList, blk)
bs.bufferedBatch.blockMap[blk.Cid()] = blk
bs.bufferSize += len(blk.RawData())
if bs.bufferSize >= bs.bufferCapacity {
// signal that a flush is appropriate, may be ignored
select {
case bs.flushCh <- struct{}{}:
default:
// do nothing
}
}
}
return nil
}
func (bs *AutobatchBlockstore) flushWorker(ctx context.Context) {
defer close(bs.doneCh)
for {
select {
case <-bs.flushCh:
// TODO: check if we _should_ actually flush. We could get a spurious wakeup
// here.
putErr := bs.doFlush(ctx, false)
for putErr != nil {
select {
case <-ctx.Done():
return
case <-time.After(bs.flushRetryDelay):
autolog.Errorf("FLUSH ERRORED: %w, retrying after %v", putErr, bs.flushRetryDelay)
putErr = bs.doFlush(ctx, true)
}
}
case <-ctx.Done():
// Do one last flush.
_ = bs.doFlush(ctx, false)
return
}
}
}
// caller must NOT hold stateLock
// set retryOnly to true to only retry a failed flush and not flush anything new.
func (bs *AutobatchBlockstore) doFlush(ctx context.Context, retryOnly bool) error {
bs.doFlushLock.Lock()
defer bs.doFlushLock.Unlock()
// If we failed to flush last time, try flushing again.
if bs.flushErr != nil {
bs.flushErr = bs.backingBs.PutMany(ctx, bs.flushingBatch.blockList)
}
// If we failed, or we're _only_ retrying, bail.
if retryOnly || bs.flushErr != nil {
return bs.flushErr
}
// Then take the current batch...
bs.stateLock.Lock()
// We do NOT clear addedCids here, because its purpose is to expedite Puts
bs.flushingBatch = bs.bufferedBatch
bs.bufferedBatch.blockList = make([]block.Block, 0, len(bs.flushingBatch.blockList))
bs.bufferedBatch.blockMap = make(map[cid.Cid]block.Block, len(bs.flushingBatch.blockMap))
bs.stateLock.Unlock()
// And try to flush it.
bs.flushErr = bs.backingBs.PutMany(ctx, bs.flushingBatch.blockList)
// If we succeeded, reset the batch. Otherwise, we'll try again next time.
if bs.flushErr == nil {
bs.stateLock.Lock()
bs.flushingBatch = blockBatch{}
bs.stateLock.Unlock()
}
return bs.flushErr
}
// caller must NOT hold stateLock
func (bs *AutobatchBlockstore) Flush(ctx context.Context) error {
return bs.doFlush(ctx, false)
}
func (bs *AutobatchBlockstore) Shutdown(ctx context.Context) error {
// TODO: Prevent puts after we call this to avoid losing data.
bs.shutdown()
select {
case <-bs.doneCh:
case <-ctx.Done():
return ctx.Err()
}
bs.doFlushLock.Lock()
defer bs.doFlushLock.Unlock()
return bs.flushErr
}
func (bs *AutobatchBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block, error) {
// may seem backward to check the backingBs first, but that is the likeliest case
blk, err := bs.backingBs.Get(ctx, c)
if err == nil {
return blk, nil
}
if !ipld.IsNotFound(err) {
return blk, err
}
bs.stateLock.Lock()
v, ok := bs.flushingBatch.blockMap[c]
if ok {
bs.stateLock.Unlock()
return v, nil
}
v, ok = bs.bufferedBatch.blockMap[c]
if ok {
bs.stateLock.Unlock()
return v, nil
}
bs.stateLock.Unlock()
// We have to check the backing store one more time because it may have been flushed by the
// time we were able to take the lock above.
return bs.backingBs.Get(ctx, c)
}
func (bs *AutobatchBlockstore) DeleteBlock(context.Context, cid.Cid) error {
// if we wanted to support this, we would have to:
// - flush
// - delete from the backingBs (if present)
// - remove from addedCids (if present)
// - if present in addedCids, also walk the ordered lists and remove if present
return xerrors.New("deletion is unsupported")
}
func (bs *AutobatchBlockstore) DeleteMany(ctx context.Context, cids []cid.Cid) error {
// see note in DeleteBlock()
return xerrors.New("deletion is unsupported")
}
func (bs *AutobatchBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) {
_, err := bs.Get(ctx, c)
if err == nil {
return true, nil
}
if ipld.IsNotFound(err) {
return false, nil
}
return false, err
}
func (bs *AutobatchBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) {
blk, err := bs.Get(ctx, c)
if err != nil {
return 0, err
}
return len(blk.RawData()), nil
}
func (bs *AutobatchBlockstore) PutMany(ctx context.Context, blks []block.Block) error {
for _, blk := range blks {
if err := bs.Put(ctx, blk); err != nil {
return err
}
}
return nil
}
func (bs *AutobatchBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
if err := bs.Flush(ctx); err != nil {
return nil, err
}
return bs.backingBs.AllKeysChan(ctx)
}
func (bs *AutobatchBlockstore) HashOnRead(enabled bool) {
bs.backingBs.HashOnRead(enabled)
}
func (bs *AutobatchBlockstore) View(ctx context.Context, cid cid.Cid, callback func([]byte) error) error {
blk, err := bs.Get(ctx, cid)
if err != nil {
return err
}
return callback(blk.RawData())
}