From 86d4d446d8fc4cb7bd0bdb766f36f465191ab721 Mon Sep 17 00:00:00 2001 From: "susumu.yata" Date: Tue, 11 Jun 2013 13:09:03 +0900 Subject: [PATCH] Implement grnxx::map::DoubleArray. --- lib/grnxx/map/double_array.cpp | 951 +++++++++++++++++++++++++- lib/grnxx/map/double_array.hpp | 83 ++- lib/grnxx/map/double_array/block.hpp | 10 + lib/grnxx/map/double_array/entry.hpp | 2 + lib/grnxx/map/double_array/header.cpp | 4 +- lib/grnxx/map/double_array/node.hpp | 29 +- 6 files changed, 1043 insertions(+), 36 deletions(-) diff --git a/lib/grnxx/map/double_array.cpp b/lib/grnxx/map/double_array.cpp index c6465c2..360d933 100644 --- a/lib/grnxx/map/double_array.cpp +++ b/lib/grnxx/map/double_array.cpp @@ -21,14 +21,32 @@ #include "grnxx/bytes.hpp" #include "grnxx/geo_point.hpp" +#include "grnxx/intrinsic.hpp" #include "grnxx/logger.hpp" +#include "grnxx/map/bytes_store.hpp" +#include "grnxx/map/double_array/block.hpp" +#include "grnxx/map/double_array/entry.hpp" #include "grnxx/map/double_array/header.hpp" +#include "grnxx/map/double_array/node.hpp" #include "grnxx/map/helper.hpp" #include "grnxx/storage.hpp" namespace grnxx { namespace map { +using double_array::BLOCK_MAX_FAILURE_COUNT; +using double_array::BLOCK_MAX_LEVEL; +using double_array::BLOCK_INVALID_ID; +using double_array::BLOCK_SIZE; +using double_array::BLOCK_MAX_COUNT; + +using double_array::NODE_TERMINAL_LABEL; +using double_array::NODE_MAX_LABEL; +using double_array::NODE_INVALID_LABEL; +using double_array::NODE_INVALID_OFFSET; + +constexpr uint64_t ROOT_NODE_ID = 0; + template Map *DoubleArray::create(Storage *, uint32_t, const MapOptions &) { GRNXX_ERROR() << "invalid combination"; @@ -55,7 +73,12 @@ template class DoubleArray; DoubleArray::DoubleArray() : storage_(nullptr), storage_node_id_(STORAGE_INVALID_NODE_ID), - header_(nullptr) {} + header_(nullptr), + nodes_(), + siblings_(), + blocks_(), + entries_(), + store_() {} DoubleArray::~DoubleArray() {} @@ -102,6 +125,339 @@ uint64_t DoubleArray::num_keys() const { return header_->num_keys; } +bool DoubleArray::get(int64_t key_id, Key *key) { + if ((key_id < MAP_MIN_KEY_ID) || (key_id > header_->max_key_id)) { + // Out of range. + return false; + } + return get_key(key_id, key) == DOUBLE_ARRAY_FOUND; +} + +bool DoubleArray::unset(int64_t key_id) { + Key key; + if (!get(key_id, &key)) { + // Not found or error. + return false; + } + return remove(key); +} + +bool DoubleArray::reset(int64_t key_id, KeyArg dest_key) { + Key src_key; + if (!get(key_id, &src_key)) { + // Not found or error. + return false; + } + return replace(src_key, dest_key); +} + +bool DoubleArray::find(KeyArg key, int64_t *key_id) { + uint64_t node_id = ROOT_NODE_ID; + Node node; + if (!nodes_->get(node_id, &node)) { + // Error. + return false; + } + uint64_t key_pos; + for (key_pos = 0; key_pos < key.size(); ++key_pos) { + if (node.is_leaf()) { + // Found. + break; + } + node_id = node.offset() ^ key[key_pos]; + if (!nodes_->get(node_id, &node)) { + // Error. + return false; + } + if (node.label() != key[key_pos]) { + // Not found. + return false; + } + } + if (!node.is_leaf()) { + if (node.child() != NODE_TERMINAL_LABEL) { + // Not found. + return false; + } + node_id = node.offset() ^ NODE_TERMINAL_LABEL; + if (!nodes_->get(node_id, &node)) { + // Error. + return false; + } + if (!node.is_leaf()) { + // Not found. + return false; + } + } + Key stored_key; + if (get_key(node.key_id(), &stored_key) != DOUBLE_ARRAY_FOUND) { + // Not found or error. + return false; + } + if (key.except_prefix(key_pos) != stored_key.except_prefix(key_pos)) { + // Not found. + return false; + } + if (key_id) { + *key_id = node.key_id(); + } + return true; +} + +bool DoubleArray::add(KeyArg key, int64_t *key_id) { + Node *node; + uint64_t key_pos; + if (find_leaf(key, &node, &key_pos) == DOUBLE_ARRAY_FAILED) { + // Error. + return false; + } + switch (insert_leaf(key, node, key_pos, &node)) { + case DOUBLE_ARRAY_FOUND: { + if (key_id) { + *key_id = node->key_id(); + } + return false; + } + case DOUBLE_ARRAY_INSERTED: { + break; + } + default: { + // Error. + return false; + } + } + const int64_t next_key_id = header_->next_key_id; + if (next_key_id > MAP_MAX_KEY_ID) { + // Error. + GRNXX_ERROR() << "too many keys: next_key_id = " << next_key_id + << ", max_key_id = " << MAP_MAX_KEY_ID; + return false; + } + Entry * const entry = entries_->get_pointer(next_key_id); + if (!entry) { + // Error. + return false; + } + uint64_t bytes_id; + if (!store_->add(key, &bytes_id)) { + // Error. + return false; + } + ++header_->num_keys; + if (next_key_id > header_->max_key_id) { + header_->max_key_id = next_key_id; + header_->next_key_id = next_key_id + 1; + } else { + header_->next_key_id = entry->next(); + } + *entry = Entry::valid_entry(bytes_id); + node->set_key_id(next_key_id); + if (key_id) { + *key_id = next_key_id; + } + return true; +} + +bool DoubleArray::remove(KeyArg key) { + Node *node; + uint64_t key_pos; + if (find_leaf(key, &node, &key_pos) != DOUBLE_ARRAY_FOUND) { + // Not found or error. + return false; + } + Entry * const entry = entries_->get_pointer(node->key_id()); + if (!entry) { + // Error. + return false; + } + if (!*entry) { + // Not found. + return false; + } + Key stored_key; + if (!store_->get(entry->bytes_id(), &stored_key)) { + // Error. + return false; + } + if (key.except_prefix(key_pos) != stored_key.except_prefix(key_pos)) { + // Not found. + return false; + } + *entry = Entry::invalid_entry(header_->next_key_id); + node->set_offset(NODE_INVALID_OFFSET); + header_->next_key_id = node->key_id(); + --header_->num_keys; + return true; +} + +bool DoubleArray::replace(KeyArg src_key, KeyArg dest_key, + int64_t *key_id) { + int64_t src_key_id; + if (!find(src_key, &src_key_id)) { + // Not found or error. + return false; + } + if (!replace_key(src_key_id, src_key, dest_key)) { + // Found or error. + return false; + } + if (key_id) { + *key_id = src_key_id; + } + return true; +} + +bool DoubleArray::truncate() { + Node * const node = nodes_->get_pointer(ROOT_NODE_ID); + if (!node) { + // Error. + return false; + } + node->set_child(NODE_INVALID_LABEL); + node->set_offset(NODE_INVALID_OFFSET); + header_->max_key_id = MAP_MIN_KEY_ID - 1; + header_->num_keys = 0; + header_->next_key_id = MAP_MIN_KEY_ID; + return true; +} + +bool DoubleArray::find_longest_prefix_match(KeyArg query, + int64_t *key_id, + Key *key) { + bool found = false; + uint64_t node_id = ROOT_NODE_ID; + Node node; + if (!nodes_->get(node_id, &node)) { + // Error. + return false; + } + uint64_t query_pos; + for (query_pos = 0; query_pos < query.size(); ++query_pos) { + if (node.is_leaf()) { + Key stored_key; + switch (get_key(node.key_id(), &stored_key)) { + case DOUBLE_ARRAY_FOUND: { + if ((stored_key.size() <= query.size()) && + (stored_key.except_prefix(query_pos) == + query.prefix(stored_key.size()).except_prefix(query_pos))) { + if (key_id) { + *key_id = node.key_id(); + } + if (key) { + *key = stored_key; + } + found = true; + } + break; + } + case DOUBLE_ARRAY_NOT_FOUND: { + break; + } + default: { + // Error. + return false; + } + } + return found; + } + + if (node.child() == NODE_TERMINAL_LABEL) { + Node leaf_node; + if (!nodes_->get(node.offset() ^ NODE_TERMINAL_LABEL, &leaf_node)) { + // Error. + return false; + } + if (leaf_node.is_leaf()) { + switch (get_key(leaf_node.key_id(), key)) { + case DOUBLE_ARRAY_FOUND: { + if (key_id) { + *key_id = leaf_node.key_id(); + } + found = true; + break; + } + case DOUBLE_ARRAY_NOT_FOUND: { + break; + } + default: { + return false; + } + } + } + } + + node_id = node.offset() ^ query[query_pos]; + if (!nodes_->get(node_id, &node)) { + // Error. + return false; + } + if (node.label() != query[query_pos]) { + return found; + } + } + + if (node.is_leaf()) { + Key stored_key; + switch (get_key(node.key_id(), &stored_key)) { + case DOUBLE_ARRAY_FOUND: { + if (stored_key.size() <= query.size()) { + if (key_id) { + *key_id = node.key_id(); + } + if (key) { + *key = stored_key; + } + found = true; + } + break; + } + case DOUBLE_ARRAY_NOT_FOUND: { + break; + } + default: { + return false; + } + } + } else if (node.child() == NODE_TERMINAL_LABEL) { + if (nodes_->get(node.offset() ^ NODE_TERMINAL_LABEL, &node)) { + switch (get_key(node.key_id(), key)) { + case DOUBLE_ARRAY_FOUND: { + if (key_id) { + *key_id = node.key_id(); + } + found = true; + break; + } + case DOUBLE_ARRAY_NOT_FOUND: { + break; + } + default: { + return false; + } + } + } + } + return found; +} + +//MapCursor *DoubleArray::create_cursor( +// MapCursorAllKeys query, const MapCursorOptions &options) { +// // TODO +// return nullptr; +//} + +//MapCursor *DoubleArray::create_cursor( +// const MapCursorKeyIDRange &query, const MapCursorOptions &options) { +// // TODO +// return nullptr; +//} + +//MapCursor *DoubleArray::create_cursor( +// const MapCursorKeyRange &query, const MapCursorOptions &options) { +// // TODO +// return nullptr; +//} + bool DoubleArray::create_map(Storage *storage, uint32_t storage_node_id, const MapOptions &) { storage_ = storage; @@ -113,13 +469,33 @@ bool DoubleArray::create_map(Storage *storage, uint32_t storage_node_id, storage_node_id_ = storage_node.id(); header_ = static_cast
(storage_node.body()); *header_ = Header(); - // TODO - return false; + nodes_.reset(NodeArray::create(storage, storage_node_id_)); + siblings_.reset(SiblingArray::create(storage, storage_node_id_)); + blocks_.reset(BlockArray::create(storage, storage_node_id_)); + entries_.reset(EntryArray::create(storage, storage_node_id_)); + store_.reset(BytesStore::create(storage, storage_node_id_)); + if (!nodes_ || !siblings_ || !blocks_ || !entries_ || !store_) { + storage->unlink_node(storage_node_id_); + return false; + } + header_->nodes_storage_node_id = nodes_->storage_node_id(); + header_->siblings_storage_node_id = siblings_->storage_node_id(); + header_->blocks_storage_node_id = blocks_->storage_node_id(); + header_->entries_storage_node_id = entries_->storage_node_id(); + header_->store_storage_node_id = store_->storage_node_id(); + Node * const root_node = reserve_node(ROOT_NODE_ID); + if (!root_node) { + storage->unlink_node(storage_node_id_); + return false; + } + root_node[NODE_INVALID_OFFSET - ROOT_NODE_ID].set_is_origin(true); + return true; } bool DoubleArray::open_map(Storage *storage, uint32_t storage_node_id) { storage_ = storage; - StorageNode storage_node = storage->open_node(storage_node_id); + storage_node_id_ = storage_node_id; + StorageNode storage_node = storage->open_node(storage_node_id_); if (!storage_node) { return false; } @@ -128,10 +504,571 @@ bool DoubleArray::open_map(Storage *storage, uint32_t storage_node_id) { << ", header_size = " << sizeof(Header); return false; } - storage_node_id_ = storage_node_id; header_ = static_cast
(storage_node.body()); - // TODO - return false; + nodes_.reset(NodeArray::open(storage, header_->nodes_storage_node_id)); + siblings_.reset( + SiblingArray::open(storage, header_->siblings_storage_node_id)); + blocks_.reset(BlockArray::open(storage, header_->blocks_storage_node_id)); + entries_.reset(EntryArray::open(storage, header_->entries_storage_node_id)); + store_.reset(BytesStore::open(storage, header_->store_storage_node_id)); + if (!nodes_ || !siblings_ || !blocks_ || !entries_ || !store_) { + return false; + } + return true; +} + +DoubleArrayResult DoubleArray::get_key(int64_t key_id, Key *key) { + Entry entry; + if (!entries_->get(key_id, &entry)) { + return DOUBLE_ARRAY_FAILED; + } + if (!entry) { + return DOUBLE_ARRAY_NOT_FOUND; + } + if (!store_->get(entry.bytes_id(), key)) { + return DOUBLE_ARRAY_FAILED; + } + return DOUBLE_ARRAY_FOUND; +} + +bool DoubleArray::replace_key(int64_t key_id, KeyArg src_key, + KeyArg dest_key) { + Node *dest_node; + uint64_t key_pos; + if (find_leaf(dest_key, &dest_node, &key_pos) == DOUBLE_ARRAY_FAILED) { + return false; + } + switch (insert_leaf(dest_key, dest_node, key_pos, &dest_node)) { + case DOUBLE_ARRAY_FOUND: { + return false; + } + case DOUBLE_ARRAY_INSERTED: { + break; + } + default: { + // Error. + return false; + } + } + Entry * const entry = entries_->get_pointer(key_id); + if (!entry) { + // Error. + return false; + } + Node *src_node; + if (find_leaf(src_key, &src_node, &key_pos) != DOUBLE_ARRAY_FOUND) { + // Critical error. + return false; + } + uint64_t bytes_id; + if (!store_->add(dest_key, &bytes_id)) { + // Error. + return false; + } + dest_node->set_key_id(key_id); + *entry = Entry::valid_entry(bytes_id); + src_node->set_offset(NODE_INVALID_OFFSET); + return true; +} + +DoubleArrayResult DoubleArray::find_leaf(KeyArg key, Node **leaf_node, + uint64_t *leaf_key_pos) { + Node *node = nodes_->get_pointer(ROOT_NODE_ID); + if (!node) { + return DOUBLE_ARRAY_FAILED; + } + uint64_t key_pos; + for (key_pos = 0; key_pos < key.size(); ++key_pos) { + if (node->is_leaf()) { + *leaf_node = node; + *leaf_key_pos = key_pos; + return DOUBLE_ARRAY_FOUND; + } + const uint64_t child_node_id = node->offset() ^ key[key_pos]; + Node * const child_node = nodes_->get_pointer(child_node_id); + if (!child_node) { + return DOUBLE_ARRAY_FAILED; + } + if (child_node->label() != key[key_pos]) { + *leaf_node = node; + *leaf_key_pos = key_pos; + return DOUBLE_ARRAY_NOT_FOUND; + } + node = child_node; + } + *leaf_node = node; + *leaf_key_pos = key_pos; + if (node->is_leaf()) { + return DOUBLE_ARRAY_FOUND; + } + if (node->child() != NODE_TERMINAL_LABEL) { + return DOUBLE_ARRAY_NOT_FOUND; + } + const uint64_t node_id = node->offset() ^ NODE_TERMINAL_LABEL; + node = nodes_->get_pointer(node_id); + if (!node) { + return DOUBLE_ARRAY_FAILED; + } + *leaf_node = node; + return node->is_leaf() ? DOUBLE_ARRAY_FOUND : DOUBLE_ARRAY_NOT_FOUND; +} + +DoubleArrayResult DoubleArray::insert_leaf(KeyArg key, Node *node, + uint64_t key_pos, + Node **leaf_node) { + if (node->is_leaf()) { + Key stored_key; + const DoubleArrayResult result = get_key(node->key_id(), &stored_key); + if (result != DOUBLE_ARRAY_FOUND) { + // Not found or error. + return result; + } + uint64_t i = key_pos; + while ((i < key.size()) && (i < stored_key.size())) { + if (key[i] != stored_key[i]) { + break; + } + ++i; + } + if ((i == key.size()) && (i == stored_key.size())) { + return DOUBLE_ARRAY_FOUND; + } + while (key_pos < i) { + if (!insert_node(node, key[key_pos], &node)) { + return DOUBLE_ARRAY_FAILED; + } + ++key_pos; + } + uint64_t labels[2]; + labels[0] = (key_pos < stored_key.size()) ? + stored_key[key_pos] : NODE_TERMINAL_LABEL; + labels[1] = (key_pos < key.size()) ? key[key_pos] : NODE_TERMINAL_LABEL; + if (!separate(node, labels, leaf_node)) { + return DOUBLE_ARRAY_FAILED; + } + return DOUBLE_ARRAY_INSERTED; + } else if (node->label() == NODE_TERMINAL_LABEL) { + *leaf_node = node; + return DOUBLE_ARRAY_INSERTED; + } else { + const uint64_t label = (key_pos < key.size()) ? + key[key_pos] : NODE_TERMINAL_LABEL; + if (!resolve(node, label)) { + return DOUBLE_ARRAY_FAILED; + } + if (!insert_node(node, label, leaf_node)) { + return DOUBLE_ARRAY_FAILED; + } + return DOUBLE_ARRAY_INSERTED; + } +} + +bool DoubleArray::insert_node(Node *node, uint64_t label, + Node **dest_node) { + uint64_t offset = node->offset(); + if (node->is_leaf() || (offset == NODE_INVALID_OFFSET)) { + if (!find_offset(&label, 1, &offset)) { + // Error. + return false; + } + } + const uint64_t next_node_id = offset ^ label; + uint8_t *next_sibling = siblings_->get_pointer(next_node_id); + if (!next_sibling) { + // Error. + return false; + } + Node * const next_node = reserve_node(next_node_id); + if (!next_node) { + // Error. + return false; + } + next_node->set_label(label); + if (node->is_leaf()) { + next_node[offset - next_node_id].set_is_origin(true); + next_node->set_key_id(node->key_id()); + } else if (node->offset() == NODE_INVALID_OFFSET) { + next_node[offset - next_node_id].set_is_origin(true); + } + node->set_offset(offset); + const uint64_t child_label = node->child(); + if (child_label == NODE_INVALID_LABEL) { + node->set_child(label); + } else if ((label == NODE_TERMINAL_LABEL) || + ((child_label != NODE_TERMINAL_LABEL) && (label < child_label))) { + // The child node becomes the first child. + *next_sibling = child_label; + next_node->set_has_sibling(); + node->set_child(label); + } else { + uint64_t prev_node_id = offset ^ child_label; + Node *prev_node = &next_node[prev_node_id - next_node_id]; + uint8_t *prev_sibling = &next_sibling[prev_node_id - next_node_id]; + uint64_t sibling_label = prev_node->has_sibling() ? + *prev_sibling : NODE_INVALID_LABEL; + while (label > sibling_label) { + prev_node_id = offset ^ sibling_label; + prev_node = &next_node[prev_node_id - next_node_id]; + prev_sibling = &next_sibling[prev_node_id - next_node_id]; + sibling_label = prev_node->has_sibling() ? + *prev_sibling : NODE_INVALID_LABEL; + } + *next_sibling = *prev_sibling; + *prev_sibling = label; + if (prev_node->has_sibling()) { + next_node->set_has_sibling(); + } + prev_node->set_has_sibling(); + } + *dest_node = next_node; + return true; +} + +bool DoubleArray::separate(Node *node, uint64_t labels[2], + Node **dest_node) { + uint64_t offset; + if (!find_offset(labels, 2, &offset)) { + // Error. + return false; + } + uint64_t node_ids[2] = { offset ^ labels[0], offset ^ labels[1] }; + Node *nodes[2]; + nodes[0] = reserve_node(node_ids[0]); + if (!nodes[0]) { + // Error. + return false; + } + nodes[1] = reserve_node(node_ids[1]); + if (!nodes[1]) { + // Error. + return false; + } + uint8_t * const sibling_block = + siblings_->get_pointer(offset & ~(BLOCK_SIZE - 1)); + if (!sibling_block) { + // Error. + return false; + } + nodes[0]->set_label(labels[0]); + nodes[0]->set_key_id(node->key_id()); + nodes[1]->set_label(labels[1]); + nodes[0][offset - node_ids[0]].set_is_origin(true); + node->set_offset(offset); + if ((labels[0] == NODE_TERMINAL_LABEL) || + ((labels[1] != NODE_TERMINAL_LABEL) && (labels[0] < labels[1]))) { + sibling_block[node_ids[0] % BLOCK_SIZE] = static_cast(labels[1]); + nodes[0]->set_has_sibling(); + node->set_child(labels[0]); + } else { + sibling_block[node_ids[1] % BLOCK_SIZE] = static_cast(labels[0]); + nodes[1]->set_has_sibling(); + node->set_child(labels[1]); + } + *dest_node = nodes[1]; + return true; +} + +bool DoubleArray::resolve(Node *node, uint64_t label) { + uint64_t offset = node->offset(); + if (offset == NODE_INVALID_OFFSET) { + return true; + } + uint64_t dest_node_id = offset ^ label; + Node * const dest_node = nodes_->get_pointer(dest_node_id); + if (!dest_node) { + // Error. + return false; + } + if (dest_node->is_phantom()) { + return true; + } + Node * const node_block = dest_node - (dest_node_id % BLOCK_SIZE); + uint8_t * const sibling_block = + siblings_->get_pointer(dest_node_id & ~(BLOCK_SIZE - 1)); + if (!sibling_block) { + // Error. + return false; + } + uint64_t labels[NODE_MAX_LABEL + 1]; + uint64_t num_labels = 0; + uint64_t child_label = node->child(); + while (child_label != NODE_INVALID_LABEL) { + labels[num_labels++] = child_label; + const uint64_t child_node_id = offset ^ child_label; + if (node_block[child_node_id % BLOCK_SIZE].has_sibling()) { + child_label = sibling_block[child_node_id % BLOCK_SIZE]; + } else { + child_label = NODE_INVALID_LABEL; + } + } + labels[num_labels] = label; + if (!find_offset(labels, num_labels + 1, &offset)) { + // Error. + return false; + } + if (!migrate_nodes(node, offset, labels, num_labels)) { + // Error. + return false; + } + return true; +} + +bool DoubleArray::migrate_nodes(Node *node, uint64_t dest_offset, + const uint64_t *labels, + uint64_t num_labels) { + const uint64_t src_offset = node->offset(); + Node * const src_node_block = + nodes_->get_pointer(src_offset & ~(BLOCK_SIZE - 1)); + if (!src_node_block) { + // Error. + return false; + } + uint8_t * const src_sibling_block = + siblings_->get_pointer(src_offset & ~(BLOCK_SIZE - 1)); + if (!src_sibling_block) { + // Error. + return false; + } + Node * const dest_node_block = + nodes_->get_pointer(dest_offset & ~(BLOCK_SIZE - 1)); + if (!dest_node_block) { + // Error. + return false; + } + uint8_t * const dest_sibling_block = + siblings_->get_pointer(dest_offset & ~(BLOCK_SIZE - 1)); + if (!dest_sibling_block) { + // Error. + return false; + } + // TODO: Copy siblings! + for (uint64_t i = 0; i < num_labels; ++i) { + const uint64_t src_node_id = src_offset ^ labels[i]; + Node * const src_node = &src_node_block[src_node_id % BLOCK_SIZE]; + uint8_t * const src_sibling = &src_sibling_block[src_node_id % BLOCK_SIZE]; + const uint64_t dest_node_id = dest_offset ^ labels[i]; + Node * const dest_node = reserve_node(dest_node_id); + if (!dest_node) { + // Error. + return false; + } + uint8_t * const dest_sibling = + &dest_sibling_block[dest_node_id % BLOCK_SIZE]; + Node dummy_node = *src_node; + dummy_node.set_is_origin(dest_node->is_origin()); + *dest_node = dummy_node; + *dest_sibling = *src_sibling; + } + header_->num_zombies += num_labels; + dest_node_block[dest_offset % BLOCK_SIZE].set_is_origin(true); + node->set_offset(dest_offset); + return true; +} + +bool DoubleArray::find_offset(const uint64_t *labels, + uint64_t num_labels, + uint64_t *found_offset) { + // Blocks are tested in descending order of level. + // Generally, a lower level contains more phantom nodes. + uint64_t level = bit_scan_reverse(num_labels) + 1; + level = (level < BLOCK_MAX_LEVEL) ? (BLOCK_MAX_LEVEL - level) : 0; + uint64_t block_count = 0; + do { + uint64_t latest_block_id = header_->latest_blocks[level]; + if (latest_block_id == BLOCK_INVALID_ID) { + // This level group is skipped because it is empty. + continue; + } + uint64_t block_id = latest_block_id; + do { + Block * const block = blocks_->get_pointer(block_id); + if (!block) { + // Error. + return false; + } + Node * const node_block = nodes_->get_pointer(block_id * BLOCK_SIZE); + if (!node_block) { + // Error. + return false; + } + const uint64_t first_phantom_node_id = block->first_phantom(); + uint64_t node_id = first_phantom_node_id; + do { + const uint64_t offset = node_id ^ labels[0]; + if (!node_block[offset].is_origin()) { + uint64_t i; + for (i = 1; i < num_labels; ++i) { + if (!node_block[offset ^ labels[i]].is_phantom()) { + break; + } + } + if (i >= num_labels) { + // Found. + *found_offset = (block_id * BLOCK_SIZE) | offset; + return true; + } + } + node_id = node_block[node_id].next(); + } while (node_id != first_phantom_node_id); + + Block * const prev_block = block; + const uint64_t prev_block_id = block_id; + const uint64_t next_block_id = block->next(); + block_id = next_block_id; + + // A block level is updated if this function fails many times. + prev_block->set_failure_count(prev_block->failure_count() + 1); + if (prev_block->failure_count() >= BLOCK_MAX_FAILURE_COUNT) { + update_block_level(prev_block_id, prev_block, level + 1); + if (next_block_id == latest_block_id) { + // All the blocks are tested. + break; + } else { + latest_block_id = header_->latest_blocks[level]; + continue; + } + } + } while ((++block_count < BLOCK_MAX_COUNT) && + (block_id != latest_block_id)); + } while ((block_count < BLOCK_MAX_COUNT) && (level-- != 0)); + // Use a new block. + *found_offset = (header_->num_blocks * BLOCK_SIZE) ^ labels[0]; + return true; +} + +auto DoubleArray::reserve_node(uint64_t node_id) -> Node * { + const uint64_t block_id = node_id / BLOCK_SIZE; + Block *block; + if (node_id >= (header_->num_blocks * BLOCK_SIZE)) { + block = reserve_block(block_id); + } else { + block = blocks_->get_pointer(block_id); + } + if (!block) { + // Error. + return nullptr; + } + Node * const node = nodes_->get_pointer(node_id); + if (!node) { + // Error. + return nullptr; + } + Node * const node_block = node - (node_id % BLOCK_SIZE); + Node * const next_node = &node_block[node->next()]; + Node * const prev_node = &node_block[node->prev()]; + if ((node_id % BLOCK_SIZE) == block->first_phantom()) { + block->set_first_phantom(node->next()); + } + prev_node->set_next(node->next()); + next_node->set_prev(node->prev()); + if (block->level() != BLOCK_MAX_LEVEL) { + const uint64_t threshold = + 1ULL << ((BLOCK_MAX_LEVEL - block->level() - 1) * 2); + if (block->num_phantoms() == threshold) { + update_block_level(block_id, block, block->level() + 1); + } + } + block->set_num_phantoms(block->num_phantoms() - 1); + node->unset_is_phantom(); + --header_->num_phantoms; + return node; +} + +auto DoubleArray::reserve_block(uint64_t block_id) -> Block * { + if (block_id >= blocks_->size()) { + GRNXX_ERROR() << "too many blocks: block_id = " << block_id + << ", blocks_size = " << blocks_->size(); + return nullptr; + } + Block * const block = blocks_->get_pointer(block_id); + if (!block) { + // Error. + return nullptr; + } + Node * const node = nodes_->get_pointer(block_id * BLOCK_SIZE); + if (!node) { + // Error. + return nullptr; + } + *block = Block::empty_block(); + for (uint64_t i = 0; i < BLOCK_SIZE; ++i) { + node[i] = Node::phantom_node(i + 1, i - 1); + } + // The level of a new block is 0. + set_block_level(block_id, block, 0); + header_->num_blocks = block_id + 1; + header_->num_phantoms += BLOCK_SIZE; + return block; +} + +bool DoubleArray::update_block_level(uint64_t block_id, Block *block, + uint64_t level) { + // TODO: If set_block_level() fails, the block gets lost. + if (!unset_block_level(block_id, block) || + !set_block_level(block_id, block, level)) { + // Error. + return false; + } + return true; +} + +bool DoubleArray::set_block_level(uint64_t block_id, Block *block, + uint64_t level) { + if (header_->latest_blocks[level] == BLOCK_INVALID_ID) { + // The block becomes the only one member of the level group. + block->set_next(block_id); + block->set_prev(block_id); + header_->latest_blocks[level] = block_id; + } else { + // The block is appended to the level group. + const uint64_t next_block_id = header_->latest_blocks[level]; + Block * const next_block = blocks_->get_pointer(next_block_id); + if (!next_block) { + // Error. + return false; + } + const uint64_t prev_block_id = next_block->prev(); + Block * const prev_block = blocks_->get_pointer(prev_block_id); + if (!prev_block) { + // Error. + return false; + } + block->set_next(next_block_id); + block->set_prev(prev_block_id); + prev_block->set_next(block_id); + next_block->set_prev(block_id); + } + block->set_level(level); + block->set_failure_count(0); + return true; +} + +bool DoubleArray::unset_block_level(uint64_t block_id, Block *block) { + const uint64_t level = block->level(); + const uint64_t next_block_id = block->next(); + const uint64_t prev_block_id = block->prev(); + if (next_block_id == prev_block_id) { + // The level group becomes empty. + header_->latest_blocks[level] = BLOCK_INVALID_ID; + } else { + Block * const next_block = blocks_->get_pointer(next_block_id); + if (!next_block) { + // Error. + return false; + } + Block * const prev_block = blocks_->get_pointer(prev_block_id); + if (!prev_block) { + // Error. + return false; + } + prev_block->set_next(next_block_id); + next_block->set_prev(prev_block_id); + if (block_id == header_->latest_blocks[level]) { + // The next block becomes the latest block of the level group. + header_->latest_blocks[level] = next_block_id; + } + } + return true; } } // namespace map diff --git a/lib/grnxx/map/double_array.hpp b/lib/grnxx/map/double_array.hpp index b01b9f8..539487e 100644 --- a/lib/grnxx/map/double_array.hpp +++ b/lib/grnxx/map/double_array.hpp @@ -22,10 +22,14 @@ #include +#include "grnxx/array.hpp" #include "grnxx/bytes.hpp" #include "grnxx/map.hpp" #include "grnxx/map_cursor.hpp" #include "grnxx/map_cursor_query.hpp" +#include "grnxx/map/double_array/block.hpp" +#include "grnxx/map/double_array/entry.hpp" +#include "grnxx/map/double_array/node.hpp" #include "grnxx/types.hpp" namespace grnxx { @@ -33,12 +37,22 @@ namespace grnxx { class Storage; namespace map { + +class BytesStore; + namespace double_array { struct Header; } // namespace double_array +enum DoubleArrayResult { + DOUBLE_ARRAY_FOUND, + DOUBLE_ARRAY_NOT_FOUND, + DOUBLE_ARRAY_INSERTED, + DOUBLE_ARRAY_FAILED +}; + template class DoubleArray { public: @@ -49,6 +63,15 @@ class DoubleArray { template <> class DoubleArray : public Map { + using Node = double_array::Node; + using Block = double_array::Block; + using Entry = double_array::Entry; + + using NodeArray = Array; // 42-bit + using SiblingArray = Array; // 42-bit + using BlockArray = Array; // 33-bit + using EntryArray = Array; // 40-bit + public: using Header = double_array::Header; using Key = typename Map::Key; @@ -68,40 +91,70 @@ class DoubleArray : public Map { int64_t max_key_id() const; uint64_t num_keys() const; - // TODO -// bool get(int64_t key_id, Key *key = nullptr); -// bool unset(int64_t key_id); -// bool reset(int64_t key_id, KeyArg dest_key); + bool get(int64_t key_id, Key *key = nullptr); + bool unset(int64_t key_id); + bool reset(int64_t key_id, KeyArg dest_key); -// bool find(KeyArg key, int64_t *key_id = nullptr); -// bool add(KeyArg key, int64_t *key_id = nullptr); -// bool remove(KeyArg key); -// bool replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id = nullptr); + bool find(KeyArg key, int64_t *key_id = nullptr); + bool add(KeyArg key, int64_t *key_id = nullptr); + bool remove(KeyArg key); + bool replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id = nullptr); -// bool truncate(); + bool truncate(); -// bool find_longest_prefix_match(KeyArg query, -// int64_t *key_id = nullptr, -// Key *key = nullptr); + bool find_longest_prefix_match(KeyArg query, + int64_t *key_id = nullptr, + Key *key = nullptr); // Cursor *create_cursor( -// MapCursorAllKeys query, +// MapCursorAllKeys query, // const MapCursorOptions &options = MapCursorOptions()); // Cursor *create_cursor( -// const MapCursorKeyIDRange &query, +// const MapCursorKeyIDRange &query, // const MapCursorOptions &options = MapCursorOptions()); // Cursor *create_cursor( -// const MapCursorKeyRange &query, +// const MapCursorKeyRange &query, // const MapCursorOptions &options = MapCursorOptions()); private: Storage *storage_; uint32_t storage_node_id_; Header *header_; + std::unique_ptr nodes_; + std::unique_ptr siblings_; + std::unique_ptr blocks_; + std::unique_ptr entries_; + std::unique_ptr store_; bool create_map(Storage *storage, uint32_t storage_node_id, const MapOptions &options); bool open_map(Storage *storage, uint32_t storage_node_id); + + DoubleArrayResult get_key(int64_t key_id, Key *key); + + bool replace_key(int64_t key_id, KeyArg src_key, KeyArg dest_key); + + DoubleArrayResult find_leaf(KeyArg key, Node **leaf_node, + uint64_t *leaf_key_pos); + DoubleArrayResult insert_leaf(KeyArg key, Node *node, uint64_t key_pos, + Node **leaf_node); + + bool insert_node(Node *node, uint64_t label, Node **dest_node); + bool separate(Node *node, uint64_t labels[2], Node **dest_node); + + bool resolve(Node *node, uint64_t label); + bool migrate_nodes(Node *node, uint64_t dest_offset, + const uint64_t *labels, uint64_t num_labels); + + bool find_offset(const uint64_t *labels, uint64_t num_labels, + uint64_t *found_offset); + + Node *reserve_node(uint64_t node_id); + Block *reserve_block(uint64_t block_id); + + bool update_block_level(uint64_t block_id, Block *block, uint64_t level); + bool set_block_level(uint64_t block_id, Block *block, uint64_t level); + bool unset_block_level(uint64_t block_id, Block *block); }; } // namespace map diff --git a/lib/grnxx/map/double_array/block.hpp b/lib/grnxx/map/double_array/block.hpp index 5da7c82..6db87e4 100644 --- a/lib/grnxx/map/double_array/block.hpp +++ b/lib/grnxx/map/double_array/block.hpp @@ -29,6 +29,8 @@ namespace double_array { constexpr uint64_t BLOCK_MAX_FAILURE_COUNT = 4; constexpr uint64_t BLOCK_MAX_LEVEL = 5; constexpr uint64_t BLOCK_INVALID_ID = (1ULL << 40) - 1; +constexpr uint64_t BLOCK_SIZE = 1ULL << 9; +constexpr uint64_t BLOCK_MAX_COUNT = 16; // The internal structure is as follows: // - values_[0] @@ -59,6 +61,12 @@ class Block { static constexpr uint8_t PREV_SHIFT = 24; public: + Block() = default; + + static Block empty_block() { + return Block(0, BLOCK_SIZE << NUM_PHANTOMS_SHIFT); + } + // Return the first phantom node. uint64_t first_phantom() const { return (values_[0] >> FIRST_PHANTOM_SHIFT) & FIRST_PHANTOM_MASK; @@ -113,6 +121,8 @@ class Block { private: uint64_t values_[2]; + + Block(uint64_t value_0, uint64_t value_1) : values_{ value_0, value_1 } {} }; } // namespace double_array diff --git a/lib/grnxx/map/double_array/entry.hpp b/lib/grnxx/map/double_array/entry.hpp index 9307a94..fcc7666 100644 --- a/lib/grnxx/map/double_array/entry.hpp +++ b/lib/grnxx/map/double_array/entry.hpp @@ -39,6 +39,8 @@ class Entry { static constexpr uint64_t IS_VALID_FLAG = 1ULL << 63; public: + Entry() = default; + static Entry valid_entry(uint64_t bytes_id) { return Entry(IS_VALID_FLAG | bytes_id); } diff --git a/lib/grnxx/map/double_array/header.cpp b/lib/grnxx/map/double_array/header.cpp index d33d436..96f4989 100644 --- a/lib/grnxx/map/double_array/header.cpp +++ b/lib/grnxx/map/double_array/header.cpp @@ -24,7 +24,7 @@ namespace map { namespace double_array { Header::Header() - : map_type(MAP_ARRAY), + : map_type(MAP_DOUBLE_ARRAY), max_key_id(MAP_MIN_KEY_ID - 1), num_keys(0), nodes_storage_node_id(STORAGE_INVALID_NODE_ID), @@ -32,7 +32,7 @@ Header::Header() blocks_storage_node_id(STORAGE_INVALID_NODE_ID), entries_storage_node_id(STORAGE_INVALID_NODE_ID), store_storage_node_id(STORAGE_INVALID_NODE_ID), - next_key_id(MAP_INVALID_KEY_ID), + next_key_id(MAP_MIN_KEY_ID), num_blocks(0), num_phantoms(0), num_zombies(0), diff --git a/lib/grnxx/map/double_array/node.hpp b/lib/grnxx/map/double_array/node.hpp index ed0ec73..69917bc 100644 --- a/lib/grnxx/map/double_array/node.hpp +++ b/lib/grnxx/map/double_array/node.hpp @@ -29,7 +29,6 @@ namespace double_array { constexpr uint64_t NODE_TERMINAL_LABEL = 0x100; constexpr uint64_t NODE_MAX_LABEL = NODE_TERMINAL_LABEL; constexpr uint64_t NODE_INVALID_LABEL = NODE_MAX_LABEL + 1; - constexpr uint64_t NODE_INVALID_OFFSET = 0; // The internal structure is as follows: @@ -75,6 +74,8 @@ class Node { static constexpr uint8_t OFFSET_SHIFT = 18; public: + Node() = default; + // Create a phantom node. static Node phantom_node(uint64_t next, uint64_t prev) { return Node(IS_PHANTOM_FLAG | ((next & NEXT_MASK) << NEXT_SHIFT) | @@ -134,8 +135,12 @@ class Node { (NODE_INVALID_LABEL << CHILD_SHIFT) | (NODE_INVALID_OFFSET << OFFSET_SHIFT); } - void set_is_origin() { - value_ |= IS_ORIGIN_FLAG; + void set_is_origin(bool is_origin) { + if (is_origin) { + value_ |= IS_ORIGIN_FLAG; + } else { + value_ &= ~IS_ORIGIN_FLAG; + } } void set_next(uint64_t next) { @@ -172,16 +177,16 @@ class Node { value_ = (value_ & ~(CHILD_MASK << CHILD_SHIFT)) | ((child & CHILD_MASK) << CHILD_SHIFT); } - // Use unset_offset() iff "offset" == INVALID_OFFSET. void set_offset(uint64_t offset) { - value_ = (value_ & ~(OFFSET_MASK << OFFSET_SHIFT)) | - ((offset & OFFSET_MASK) << OFFSET_SHIFT); - } - void unset_offset() { - value_ = (value_ & (IS_ORIGIN_FLAG | (LABEL_MASK << LABEL_SHIFT) | - IS_LEAF_FLAG)) | - (NODE_INVALID_LABEL << CHILD_SHIFT) | - (NODE_INVALID_OFFSET << OFFSET_SHIFT); + if (value_ & IS_LEAF_FLAG) { + value_ = (value_ & ~(IS_LEAF_FLAG | (OFFSET_MASK << OFFSET_SHIFT) | + (CHILD_MASK << CHILD_SHIFT))) | + (offset << OFFSET_SHIFT) | + (NODE_INVALID_LABEL << CHILD_SHIFT); + } else { + value_ = (value_ & ~(OFFSET_MASK << OFFSET_SHIFT)) | + (offset << OFFSET_SHIFT); + } } private: