-
Notifications
You must be signed in to change notification settings - Fork 29
/
couch-kvstore.hh
571 lines (501 loc) · 16.6 KB
/
couch-kvstore.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
#ifndef COUCH_KVSTORE_H
#define COUCH_KVSTORE_H 1
#include "libcouchstore/couch_db.h"
#include "kvstore.hh"
#include "item.hh"
#include "histo.hh"
#include "stats.hh"
#include "configuration.hh"
#include "couch-kvstore/couch-notifier.hh"
#include "couch-kvstore/couch-fs-stats.hh"
#define COUCHSTORE_NO_OPTIONS 0
/**
* Stats and timings for couchKVStore
*/
class CouchKVStoreStats {
public:
/**
* Default constructor
*/
CouchKVStoreStats() :
docsCommitted(0), numOpen(0), numClose(0),
numLoadedVb(0), numGetFailure(0), numSetFailure(0),
numDelFailure(0), numOpenFailure(0), numVbSetFailure(0),
readSizeHisto(ExponentialGenerator<size_t>(1, 2), 25),
writeSizeHisto(ExponentialGenerator<size_t>(1, 2), 25) {
}
void reset() {
docsCommitted.set(0);
numOpen.set(0);
numClose.set(0);
numLoadedVb.set(0);
numGetFailure.set(0);
numSetFailure.set(0);
numDelFailure.set(0);
numOpenFailure.set(0);
numVbSetFailure.set(0);
numCommitRetry.set(0);
readTimeHisto.reset();
readSizeHisto.reset();
writeTimeHisto.reset();
writeSizeHisto.reset();
delTimeHisto.reset();
commitHisto.reset();
commitRetryHisto.reset();
saveDocsHisto.reset();
batchSize.reset();
fsStats.reset();
}
// the number of docs committed
Atomic<size_t> docsCommitted;
// the number of open() calls
Atomic<size_t> numOpen;
// the number of close() calls
Atomic<size_t> numClose;
// the number of vbuckets loaded
Atomic<size_t> numLoadedVb;
//stats tracking failures
Atomic<size_t> numGetFailure;
Atomic<size_t> numSetFailure;
Atomic<size_t> numDelFailure;
Atomic<size_t> numOpenFailure;
Atomic<size_t> numVbSetFailure;
Atomic<size_t> numCommitRetry;
/* for flush and vb delete, no error handling in CouchKVStore, such
* failure should be tracked in MC-engine */
// How long it takes us to complete a read
Histogram<hrtime_t> readTimeHisto;
// How big are our reads?
Histogram<size_t> readSizeHisto;
// How long it takes us to complete a write
Histogram<hrtime_t> writeTimeHisto;
// How big are our writes?
Histogram<size_t> writeSizeHisto;
// Time spent in delete() calls.
Histogram<hrtime_t> delTimeHisto;
// Time spent in couchstore commit
Histogram<hrtime_t> commitHisto;
// Time spent in couchstore commit retry
Histogram<hrtime_t> commitRetryHisto;
// Time spent in couchstore save documents
Histogram<hrtime_t> saveDocsHisto;
// Batch size of saveDocs calls
Histogram<size_t> batchSize;
// Stats from the underlying OS file operations done by couchstore.
CouchstoreStats fsStats;
};
class EventuallyPersistentEngine;
class EPStats;
typedef union {
Callback <mutation_result> *setCb;
Callback <int> *delCb;
} CouchRequestCallback;
const size_t COUCHSTORE_METADATA_SIZE(2 * sizeof(uint32_t) + sizeof(uint64_t));
/**
* Class representing a document to be persisted in couchstore.
*/
class CouchRequest
{
public:
/**
* Constructor
*
* @param it Item instance to be persisted
* @param rev vbucket database revision number
* @param cb persistence callback
* @param del flag indicating if it is an item deletion or not
*/
CouchRequest(const Item &it, uint64_t rev, CouchRequestCallback &cb, bool del);
/**
* Get the vbucket id of a document to be persisted
*
* @return vbucket id of a document
*/
uint16_t getVBucketId(void) {
return vbucketId;
}
/**
* Get the revision number of the vbucket database file
* where the document is persisted
*
* @return revision number of the corresponding vbucket database file
*/
uint64_t getRevNum(void) {
return fileRevNum;
}
/**
* Get the couchstore Doc instance of a document to be persisted
*
* @return pointer to the couchstore Doc instance of a document
*/
Doc *getDbDoc(void) {
if (deleteItem) {
return NULL;
} else {
return &dbDoc;
}
}
/**
* Get the couchstore DocInfo instance of a document to be persisted
*
* @return pointer to the couchstore DocInfo instance of a document
*/
DocInfo *getDbDocInfo(void) {
return &dbDocInfo;
}
/**
* Get the callback instance for SET
*
* @return callback instance for SET
*/
Callback<mutation_result> *getSetCallback(void) {
return callback.setCb;
}
/**
* Get the callback instance for DELETE
*
* @return callback instance for DELETE
*/
Callback<int> *getDelCallback(void) {
return callback.delCb;
}
/**
* Get the sequence number of a document to be persisted
*
* @return sequence number of a document
*/
int64_t getItemId(void) {
return itemId;
}
/**
* Get the time in ns elapsed since the creation of this instance
*
* @return time in ns elapsed since the creation of this instance
*/
hrtime_t getDelta() {
return (gethrtime() - start) / 1000;
}
/**
* Get the length of a document body to be persisted
*
* @return length of a document body
*/
size_t getNBytes() {
return valuelen;
}
/**
* Return true if the document to be persisted is for DELETE
*
* @return true if the document to be persisted is for DELETE
*/
bool isDelete() {
return deleteItem;
};
/**
* Get the key of a document to be persisted
*
* @return key of a document to be persisted
*/
const std::string& getKey(void) const {
return key;
}
private :
value_t value;
size_t valuelen;
uint8_t meta[COUCHSTORE_METADATA_SIZE];
uint16_t vbucketId;
uint64_t fileRevNum;
std::string key;
Doc dbDoc;
DocInfo dbDocInfo;
int64_t itemId;
bool deleteItem;
CouchRequestCallback callback;
hrtime_t start;
};
/**
* KVStore with couchstore as the underlying storage system
*/
class CouchKVStore : public KVStore
{
public:
/**
* Constructor
*
* @param theEngine EventuallyPersistentEngine instance
* @param read_only flag indicating if this kvstore instance is for read-only operations
*/
CouchKVStore(EventuallyPersistentEngine &theEngine,
bool read_only = false);
/**
* Copy constructor
*
* @param from the source kvstore instance
*/
CouchKVStore(const CouchKVStore &from);
/**
* Deconstructor
*/
virtual ~CouchKVStore() {
close();
}
/**
* Reset database to a clean state.
*/
void reset(void);
/**
* Begin a transaction (if not already in one).
*
* @return true if the transaction is started successfully
*/
bool begin(void) {
assert(!isReadOnly());
intransaction = true;
return intransaction;
}
/**
* Commit a transaction (unless not currently in one).
*
* @return true if the commit is completed successfully.
*/
bool commit(void);
/**
* Rollback a transaction (unless not currently in one).
*/
void rollback(void) {
assert(!isReadOnly());
if (intransaction) {
intransaction = false;
}
}
/**
* Query the properties of the underlying storage.
*
* @return properties of the underlying storage system
*/
StorageProperties getStorageProperties(void);
/**
* Insert or update a given document.
*
* @param itm instance representing the document to be inserted or updated
* @param cb callback instance for SET
*/
void set(const Item &itm, Callback<mutation_result> &cb);
/**
* Retrieve the document with a given key from the underlying storage system.
*
* @param key the key of a document to be retrieved
* @param rowid the sequence number of a document
* @param vb vbucket id of a document
* @param cb callback instance for GET
*/
void get(const std::string &key, uint64_t rowid,
uint16_t vb, Callback<GetValue> &cb);
/**
* Retrieve the multiple documents from the underlying storage system at once.
*
* @param vb vbucket id of a document
* @param itms list of items whose documents are going to be retrieved
*/
void getMulti(uint16_t vb, vb_bgfetch_queue_t &itms);
/**
* Delete a given document from the underlying storage system.
*
* @param itm instance representing the document to be deleted
* @param rowid the sequence number of a document
* @param cb callback instance for DELETE
*/
void del(const Item &itm, uint64_t rowid,
Callback<int> &cb);
/**
* Delete a given vbucket database instance from the underlying storage system
*
* @param vbucket vbucket id
* @param recreate true if we need to create an empty vbucket after deletion
* @return true if the vbucket deletion is completed successfully.
*/
bool delVBucket(uint16_t vbucket, bool recreate);
/**
* Retrieve the list of persisted vbucket states
*
* @return vbucket state map instance where key is vbucket id and
* value is vbucket state
*/
vbucket_map_t listPersistedVbuckets(void);
/**
* Retrieve ths list of persisted engine stats
*
* @param stats map instance where the persisted engine stats will be added
*/
void getPersistedStats(std::map<std::string, std::string> &stats);
/**
* Persist a snapshot of the engine stats in the underlying storage.
*
* @param engine_stats map instance that contains all the engine stats
* @return true if the snapshot is done successfully
*/
bool snapshotStats(const std::map<std::string, std::string> &engine_stats);
/**
* Persist a snapshot of the vbucket states in the underlying storage system.
*
* @param vb_stats map instance that contains all the vbucket states
* @return true if the snapshot is done successfully
*/
bool snapshotVBuckets(const vbucket_map_t &vb_states);
/**
* Retrieve all the documents from the underlying storage system.
*
* @param cb callback instance to process each document retrieved
*/
void dump(shared_ptr<Callback<GetValue> > cb);
/**
* Retrieve all the documents for a given vbucket from the storage system.
*
* @param vb vbucket id
* @param cb callback instance to process each document retrieved
*/
void dump(uint16_t vb, shared_ptr<Callback<GetValue> > cb);
/**
* Retrieve all the keys from the underlying storage system.
*
* @param vbids list of vbucket ids whose document keys are going to be retrieved
* @param cb callback instance to process each key retrieved
*/
void dumpKeys(const std::vector<uint16_t> &vbids, shared_ptr<Callback<GetValue> > cb);
/**
* Retrieve the list of keys and their meta data for a given
* vbucket, which were deleted.
* @param vb vbucket id
* @param cb callback instance to process each key and its meta data
*/
void dumpDeleted(uint16_t vb, shared_ptr<Callback<GetValue> > cb);
/**
* Does the underlying storage system support key-only retrieval operations?
*
* @return true if key-only retrieval is supported
*/
bool isKeyDumpSupported() {
return true;
}
/**
* Warmup function
*
* @param lf access log instance that contains the list of resident
* items before shutdown
* @param vbmap map instance where key is vbucket id and value is its state
* @param cb callback instance for warmup
* @param estimate callback instance for estimating warmup completion
*/
size_t warmup(MutationLog &lf,
const std::map<uint16_t, vbucket_state> &vbmap,
Callback<GetValue> &cb,
Callback<size_t> &estimate);
/**
* Get the estimated number of items that are going to be loaded during warmup.
*
* @param items number of estimated items to be loaded during warmup
* @return true if the estimation is completed successfully
*/
bool getEstimatedItemCount(size_t &items);
/**
* Perform the pre-optimizations before persisting dirty items
*
* @param items list of dirty items that can be pre-optimized
*/
void optimizeWrites(std::vector<queued_item> &items);
/**
* Add all the kvstore stats to the stat response
*
* @param prefix stat name prefix
* @param add_stat upstream function that allows us to add a stat to the response
* @param cookie upstream connection cookie
*/
void addStats(const std::string &prefix, ADD_STAT add_stat, const void *cookie);
/**
* Add all the kvstore timings stats to the stat response
*
* @param prefix stat name prefix
* @param add_stat upstream function that allows us to add a stat to the response
* @param cookie upstream connection cookie
*/
void addTimingStats(const std::string &prefix, ADD_STAT add_stat,
const void *c);
/**
* Resets couchstore stats
*/
void resetStats() {
st.reset();
}
void processTxnSizeChange(size_t txn_size) {
(void) txn_size;
}
void setVBBatchCount(size_t batch_count) {
(void) batch_count;
}
void destroyInvalidVBuckets(bool destroyOnlyOne = false) {
(void) destroyOnlyOne;
}
static int recordDbDump(Db *db, DocInfo *docinfo, void *ctx);
static int recordDbStat(Db *db, DocInfo *docinfo, void *ctx);
static int getMultiCb(Db *db, DocInfo *docinfo, void *ctx);
static void readVBState(Db *db, uint16_t vbId, vbucket_state &vbState);
couchstore_error_t fetchDoc(Db *db, DocInfo *docinfo,
GetValue &docValue, uint16_t vbId,
bool metaOnly);
ENGINE_ERROR_CODE couchErr2EngineErr(couchstore_error_t errCode);
CouchKVStoreStats &getCKVStoreStat(void) { return st; }
protected:
void loadDB(shared_ptr<Callback<GetValue> > cb, bool keysOnly,
std::vector<uint16_t> *vbids,
couchstore_docinfos_options options=COUCHSTORE_NO_OPTIONS);
bool setVBucketState(uint16_t vbucketId, vbucket_state &vbstate,
uint32_t vb_change_type, bool newfile = false,
bool notify = true);
bool resetVBucket(uint16_t vbucketId, vbucket_state &vbstate) {
return setVBucketState(vbucketId, vbstate, VB_STATE_CHANGED, true, true);
}
template <typename T>
void addStat(const std::string &prefix, const char *nm, T &val,
ADD_STAT add_stat, const void *c);
private:
void operator=(const CouchKVStore &from);
void open();
void close();
bool commit2couchstore(void);
void queueItem(CouchRequest *req);
bool getDbFile(uint16_t vbucketId, std::string &dbFileName);
uint64_t checkNewRevNum(std::string &dbname, bool newFile = false);
void populateFileNameMap(std::vector<std::string> &filenames);
void getFileNameMap(std::vector<uint16_t> *vbids, std::string &dirname,
std::map<uint16_t, uint64_t> &filemap);
void updateDbFileMap(uint16_t vbucketId, uint64_t newFileRev,
bool insertImmediately = false);
void remVBucketFromDbFileMap(uint16_t vbucketId);
couchstore_error_t openDB(uint16_t vbucketId, uint64_t fileRev, Db **db,
uint64_t options, uint64_t *newFileRev = NULL);
couchstore_error_t openDB_retry(std::string &dbfile, uint64_t options,
const couch_file_ops *ops,
Db **db, uint64_t *newFileRev);
couchstore_error_t saveDocs(uint16_t vbid, uint64_t rev, Doc **docs,
DocInfo **docinfos, int docCount);
void commitCallback(CouchRequest **committedReqs, int numReqs,
couchstore_error_t errCode);
couchstore_error_t saveVBState(Db *db, vbucket_state &vbState);
void setDocsCommitted(uint16_t docs);
void closeDatabaseHandle(Db *db);
EventuallyPersistentEngine &engine;
EPStats &epStats;
Configuration &configuration;
const std::string dbname;
CouchNotifier *couchNotifier;
std::map<uint16_t, uint64_t>dbFileMap;
std::vector<CouchRequest *> pendingReqsQ;
size_t pendingCommitCnt;
bool intransaction;
/* all stats */
CouchKVStoreStats st;
couch_file_ops statCollectingFileOps;
/* vbucket state cache*/
vbucket_map_t cachedVBStates;
};
#endif /* COUCHSTORE_KVSTORE_H */