Skip to content

Commit

Permalink
MB-6945 Backport file format refactor from indexer
Browse files Browse the repository at this point in the history
Pulling in changes from commit c7b1432 on the indexer branch.
Originally reviewed at http://review.couchbase.org/19815

These changes consolidate the file format definition and
make code dealing with the file format cleaner, and also
make it much easier to change the file format.

Change-Id: I4ee37e66f451fb77fcc3f6264a262c9bbb5417e6
Reviewed-on: http://review.couchbase.org/22103
Reviewed-by: Jens Alfke <jens@couchbase.com>
Reviewed-by: Damien Katz <damien@couchbase.com>
Tested-by: Damien Katz <damien@couchbase.com>
  • Loading branch information
apage43 authored and steveyen committed Nov 2, 2012
1 parent d0e70d0 commit 1c3763e
Show file tree
Hide file tree
Showing 17 changed files with 597 additions and 314 deletions.
8 changes: 5 additions & 3 deletions Makefile.am
Expand Up @@ -62,14 +62,16 @@ libcouchstore_la_SOURCES = \
src/llmsort.c \
src/mergesort.c \
src/mergesort.h \
src/node_types.c \
src/node_types.h \
src/reduces.c \
src/reduces.h \
src/strerror.c \
src/util.c \
src/util.h


libcouchstore_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBCOUCHSTORE_API_CURRENT):$(LIBCOUCHSTORE_API_REVISION):$(LIBCOUCHSTORE_API_AGE) -no-undefined -lsnappy -lpthread
libcouchstore_la_LDFLAGS = $(AM_LDFLAGS) $(ICU_LOCAL_LDFLAGS) -version-info $(LIBCOUCHSTORE_API_CURRENT):$(LIBCOUCHSTORE_API_REVISION):$(LIBCOUCHSTORE_API_AGE) -no-undefined -lsnappy -lpthread

if WINDOWS
libcouchstore_la_SOURCES += src/os_win.c
Expand All @@ -78,8 +80,8 @@ else
libcouchstore_la_SOURCES += src/os.c
endif

libcouchstore_la_CFLAGS = $(AM_CFLAGS) -DLIBCOUCHSTORE_INTERNAL=1
libcouchstore_la_LIBADD = librfc1321.la libbyteswap.la
libcouchstore_la_CFLAGS = $(AM_CFLAGS) $(ICU_LOCAL_CFLAGS) -DLIBCOUCHSTORE_INTERNAL=1 -Wstrict-aliasing=2
libcouchstore_la_LIBADD = librfc1321.la libbyteswap.la $(ICU_LOCAL_LIBS)

couch_dbdump_SOURCES = src/dbdump.c
couch_dbdump_DEPENDENCIES = libcouchstore.la
Expand Down
141 changes: 100 additions & 41 deletions src/bitfield.h
Expand Up @@ -2,64 +2,123 @@
#define COUCH_BITFIELD_H

#include "config.h"
#include "internal.h"
#include <assert.h>
#include <string.h>

/** Read a 48-bit (6-byte) big-endian integer from the address pointed to by buf. */
static inline uint64_t get_48(const char *buf)

// Variable-width types. Since these are made out of chars they will be byte-aligned,
// so structs consisting only of these will be packed.

typedef struct {
uint8_t raw_bytes[1];
} raw_08;

typedef struct {
uint8_t raw_bytes[2];
} raw_16;

typedef struct {
uint8_t raw_bytes[3];
} raw_24;

typedef struct {
uint8_t raw_bytes[4];
} raw_32;

typedef struct {
uint8_t raw_bytes[5];
} raw_40;

typedef struct {
uint8_t raw_bytes[6];
} raw_48;

typedef struct {
uint8_t raw_bytes[8];
} raw_64;


// Functions for decoding raw_xx types to native integers:

#define DECODE_RAW(DST_TYPE, FLIP_FN) \
DST_TYPE value = 0; \
memcpy((char*)&value + sizeof(value) - sizeof(raw), &(raw), sizeof(raw)); \
return FLIP_FN(value)

static inline uint8_t decode_raw08(raw_08 raw)
{
return raw.raw_bytes[0];
}

static inline uint16_t decode_raw16(raw_16 raw)
{
DECODE_RAW(uint16_t, ntohs);
}

static inline uint32_t decode_raw24(raw_24 raw)
{
DECODE_RAW(uint32_t, ntohl);
}

static inline uint32_t decode_raw32(raw_32 raw)
{
DECODE_RAW(uint32_t, ntohl);
}

static inline uint64_t decode_raw40(raw_40 raw)
{
const uint32_t* longs = (const uint32_t*)buf;
const uint16_t* shorts = (const uint16_t*)buf;
return ((uint64_t)ntohl(longs[0]) << 16) | ntohs(shorts[2]);
DECODE_RAW(uint64_t, ntohll);
}

/** Read a 40-bit (5-byte) big-endian integer from the address pointed to by buf. */
static inline uint64_t get_40(const char *buf)
static inline uint64_t decode_raw48(raw_48 raw)
{
const uint32_t* longs = (const uint32_t*)buf;
return ((uint64_t)ntohl(longs[0]) << 8) | (uint8_t)buf[4];
DECODE_RAW(uint64_t, ntohll);
}

/** Read a 32-bit big-endian integer from the address pointed to by buf. */
static inline uint32_t get_32(const char *buf)
static inline uint64_t decode_raw64(raw_64 raw)
{
return ntohl(*(const uint32_t*)buf);
DECODE_RAW(uint64_t, ntohll);
}

/** Read a 16-bit big-endian integer from the address pointed to by buf. */
static inline uint32_t get_16(const char *buf)

// Functions for encoding native integers to raw_xx types:

#define ENCODE_RAW(FLIP_FN, RAW_TYPE) \
value = FLIP_FN(value); \
RAW_TYPE raw; \
memcpy(&raw, (char*)&value + sizeof(value) - sizeof(raw), sizeof(raw)); \
return raw


static inline raw_08 encode_raw08(uint8_t value)
{
return ntohs(*(const uint16_t*)buf);
ENCODE_RAW(, raw_08);
}

/** Read a 12-bit key length and 28-bit value length, packed into 5 bytes big-endian. */
static inline void get_kvlen(const char *buf, uint32_t *klen, uint32_t *vlen)
static inline raw_16 encode_raw16(uint16_t value)
{
//12, 28 bit
*klen = get_16(buf) >> 4;
*vlen = get_32(buf + 1) & 0x0FFFFFFF;
ENCODE_RAW(htons, raw_16);
}

/** Flip on the the bits of num as a numbits-bit number, offset bitpos bits into
buf. MUST ZERO MEMORY _BEFORE_ WRITING TO IT! */
static inline void set_bits(char *buf, const int bitpos, const int numbits, uint64_t num)
static inline raw_32 encode_raw32(uint32_t value)
{
// This may look inefficient, but since set_bits is generally called with constant values for
// bitpos and numbits, the 'if' tests will be resolved by the optimizer and only the
// appropriate block will be compiled.
if (bitpos + numbits <= 16) {
uint16_t num16 = (uint16_t) num;
num16 <<= (16 - (numbits + bitpos));
num16 = htons(num16);
*(uint16_t*)buf |= num16;
} else if (bitpos + numbits <= 32) {
uint32_t num32 = (uint32_t) num;
num32 <<= (32 - (numbits + bitpos));
num32 = htonl(num32);
*(uint32_t*)buf |= num32;
} else {
num = num << (64 - (numbits + bitpos));
num = htonll(num);
*(uint64_t*)buf |= num;
}
ENCODE_RAW(htonl, raw_32);
}

static inline raw_40 encode_raw40(uint64_t value)
{
ENCODE_RAW(htonll, raw_40);
}

static inline raw_48 encode_raw48(uint64_t value)
{
ENCODE_RAW(htonll, raw_48);
}

static inline raw_64 encode_raw64(uint64_t value)
{
ENCODE_RAW(htonll, raw_64);
}

#endif
61 changes: 24 additions & 37 deletions src/btree_modify.c
Expand Up @@ -7,7 +7,7 @@
#include "couch_btree.h"
#include "util.h"
#include "arena.h"
#include "bitfield.h"
#include "node_types.h"

#define CHUNK_THRESHOLD 1279
#define CHUNK_SIZE (CHUNK_THRESHOLD * 2 / 3)
Expand Down Expand Up @@ -119,15 +119,15 @@ couchstore_error_t mr_push_item(sized_buf *k, sized_buf *v, couchfile_modify_res

static nodelist *encode_pointer(arena* a, node_pointer *ptr)
{
nodelist *pel = make_nodelist(a, 14 + ptr->reduce_value.size);
nodelist *pel = make_nodelist(a, sizeof(raw_node_pointer) + ptr->reduce_value.size);
if (!pel) {
return NULL;
}
memset(pel->data.buf, 0, 14);
set_bits(pel->data.buf + 0, 0, 48, ptr->pointer);
set_bits(pel->data.buf + 6, 0, 48, ptr->subtreesize);
set_bits(pel->data.buf + 12, 0, 16, ptr->reduce_value.size);
memcpy(pel->data.buf + 14, ptr->reduce_value.buf, ptr->reduce_value.size);
raw_node_pointer *raw = (raw_node_pointer*)pel->data.buf;
raw->pointer = encode_raw48(ptr->pointer);
raw->subtreesize = encode_raw48(ptr->subtreesize);
raw->reduce_value_size = encode_raw16((uint16_t)ptr->reduce_value.size);
memcpy(raw + 1, ptr->reduce_value.buf, ptr->reduce_value.size);
pel->pointer = ptr;
pel->key = ptr->key;
return pel;
Expand All @@ -154,10 +154,11 @@ static node_pointer *read_pointer(arena* a, sized_buf *key, char *buf)
if (!p) {
return NULL;
}
p->pointer = get_48(buf);
p->subtreesize = get_48(buf + 6);
p->reduce_value.size = get_16(buf + 12);
p->reduce_value.buf = buf + 14;
const raw_node_pointer *raw = (const raw_node_pointer*)buf;
p->pointer = decode_raw48(raw->pointer);
p->subtreesize = decode_raw48(raw->subtreesize);
p->reduce_value.size = decode_raw16(raw->reduce_value_size);
p->reduce_value.buf = buf + sizeof(*raw);
p->key = *key;
return p;
}
Expand All @@ -173,7 +174,7 @@ static couchstore_error_t flush_mr(couchfile_modify_result *res)
//with uncompressed size of at least mr_quota
static couchstore_error_t flush_mr_partial(couchfile_modify_result *res, size_t mr_quota)
{
size_t bufpos = 0;
char *dst;
int errcode = COUCHSTORE_SUCCESS;
int itmcount = 0;
char *nodebuf = NULL;
Expand All @@ -198,19 +199,14 @@ static couchstore_error_t flush_mr_partial(couchfile_modify_result *res, size_t

writebuf.buf = nodebuf;

nodebuf[0] = (char) res->node_type;
bufpos = 1;
dst = nodebuf;
*(dst++) = (char) res->node_type;

nodelist *i = res->values->next;
//We don't care that we've reached mr_quota if we haven't written out
//at least two items and we're not writing a leaf node.
while (i != NULL && (mr_quota > 0 || (itmcount < 2 && res->node_type == KP_NODE))) {
memset(nodebuf + bufpos, 0, 5);
set_bits(nodebuf + bufpos , 0, 12, i->key.size);
set_bits(nodebuf + bufpos + 1, 4, 28, i->data.size);
memcpy(nodebuf + bufpos + 5, i->key.buf, i->key.size);
memcpy(nodebuf + bufpos + 5 + i->key.size, i->data.buf, i->data.size);
bufpos = bufpos + 5 + i->data.size + i->key.size;
dst = write_kv(dst, i->key, i->data);
if (i->pointer) {
subtreesize += i->pointer->subtreesize;
}
Expand All @@ -221,7 +217,7 @@ static couchstore_error_t flush_mr_partial(couchfile_modify_result *res, size_t
itmcount++;
}

writebuf.size = bufpos;
writebuf.size = dst - nodebuf;

errcode = db_write_buf_compressed(res->rq->db, &writebuf, &diskpos, &disk_size);
free(nodebuf); // here endeth the nodebuf.
Expand Down Expand Up @@ -262,7 +258,7 @@ static couchstore_error_t flush_mr_partial(couchfile_modify_result *res, size_t
res->pointers_end->next = pel;
res->pointers_end = pel;

res->node_len -= (bufpos - 1);
res->node_len -= (writebuf.size - 1);

res->values->next = i;
if(i == NULL) {
Expand Down Expand Up @@ -329,11 +325,8 @@ static couchstore_error_t modify_node(couchfile_modify_request *rq,
if (nptr == NULL || nodebuf[0] == 1) { //KV Node
local_result->node_type = KV_NODE;
while (bufpos < nodebuflen) {
uint32_t klen, vlen;
get_kvlen(nodebuf + bufpos, &klen, &vlen);
sized_buf cmp_key = {nodebuf + bufpos + 5, klen};
sized_buf val_buf = {nodebuf + bufpos + 5 + klen, vlen};
bufpos += 5 + klen + vlen;
sized_buf cmp_key, val_buf;
bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf);
int advance = 0;
while (!advance && start < end) {
advance = 1;
Expand Down Expand Up @@ -414,11 +407,8 @@ static couchstore_error_t modify_node(couchfile_modify_request *rq,
} else if (nodebuf[0] == 0) { //KP Node
local_result->node_type = KP_NODE;
while (bufpos < nodebuflen && start < end) {
uint32_t klen, vlen;
get_kvlen(nodebuf + bufpos, &klen, &vlen);
sized_buf cmp_key = {nodebuf + bufpos + 5, klen};
sized_buf val_buf = {nodebuf + bufpos + 5 + klen, vlen};
bufpos += 5 + klen + vlen;
sized_buf cmp_key, val_buf;
bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf);
int cmp_val = rq->cmp.compare(&cmp_key, rq->actions[start].key);
if (bufpos == nodebuflen) {
//We're at the last item in the kpnode, must apply all our
Expand Down Expand Up @@ -473,11 +463,8 @@ static couchstore_error_t modify_node(couchfile_modify_request *rq,
}
}
while (bufpos < nodebuflen) {
uint32_t klen, vlen;
get_kvlen(nodebuf + bufpos, &klen, &vlen);
sized_buf cmp_key = {nodebuf + bufpos + 5, klen};
sized_buf val_buf = {nodebuf + bufpos + 5 + klen, vlen};
bufpos += 5 + klen + vlen;
sized_buf cmp_key, val_buf;
bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf);
node_pointer *add = read_pointer(dst->arena, &cmp_key, val_buf.buf);
if (!add) {
errcode = COUCHSTORE_ERROR_ALLOC_FAIL;
Expand Down
30 changes: 7 additions & 23 deletions src/btree_read.c
Expand Up @@ -3,7 +3,7 @@
#include <stdlib.h>
#include "couch_btree.h"
#include "util.h"
#include "bitfield.h"
#include "node_types.h"

static couchstore_error_t btree_lookup_inner(couchfile_lookup_request *rq,
uint64_t diskpos,
Expand All @@ -24,11 +24,8 @@ static couchstore_error_t btree_lookup_inner(couchfile_lookup_request *rq,

if (nodebuf[0] == 0) { //KP Node
while (bufpos < nodebuflen && current < end) {
uint32_t klen, vlen;
get_kvlen(nodebuf + bufpos, &klen, &vlen);
sized_buf cmp_key = {nodebuf + bufpos + 5, klen};
sized_buf val_buf = {nodebuf + bufpos + 5 + klen, vlen};
bufpos += 5 + klen + vlen;
sized_buf cmp_key, val_buf;
bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf);

if (rq->cmp.compare(&cmp_key, rq->keys[current]) >= 0) {
if (rq->fold) {
Expand All @@ -43,31 +40,18 @@ static couchstore_error_t btree_lookup_inner(couchfile_lookup_request *rq,
last_item++;
} while (last_item < end && rq->cmp.compare(&cmp_key, rq->keys[last_item]) >= 0);

if (rq->node_callback) {
// Invoke the node_callback on entry to the child node (and later on exit)
uint64_t subtreeSize = get_48(val_buf.buf + 6);
sized_buf reduce_value = {val_buf.buf + 14, get_16(val_buf.buf + 12)};
error_pass(rq->node_callback(rq, subtreeSize, &reduce_value));
}

pointer = get_48(val_buf.buf);
const raw_node_pointer *raw = (const raw_node_pointer*)val_buf.buf;
pointer = decode_raw48(raw->pointer);
error_pass(btree_lookup_inner(rq, pointer, current, last_item));
if (!rq->in_fold) {
current = last_item;
}

if (rq->node_callback) {
error_pass(rq->node_callback(rq, 0, NULL));
}
}
}
} else if (nodebuf[0] == 1) { //KV Node
while (bufpos < nodebuflen && current < end) {
uint32_t klen, vlen;
get_kvlen(nodebuf + bufpos, &klen, &vlen);
sized_buf cmp_key = {nodebuf + bufpos + 5, klen};
sized_buf val_buf = {nodebuf + bufpos + 5 + klen, vlen};
bufpos += 5 + klen + vlen;
sized_buf cmp_key, val_buf;
bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf);
int cmp_val = rq->cmp.compare(&cmp_key, rq->keys[current]);
if (cmp_val >= 0 && rq->fold && !rq->in_fold) {
rq->in_fold = 1;
Expand Down

0 comments on commit 1c3763e

Please sign in to comment.