Skip to content

Commit

Permalink
Feature/unique vpack index lookup performance improvements (#3087)
Browse files Browse the repository at this point in the history
* fixed issue #3075

* update the known issues document, as some issues have already been fixed

* speed up equality lookups in unique vpack indexes

* fixed wording
  • Loading branch information
jsteemann committed Aug 24, 2017
1 parent 4933eb4 commit 5c5b5e5
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 36 deletions.
2 changes: 1 addition & 1 deletion arangod/RocksDBEngine/RocksDBColumnFamily.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct RocksDBColumnFamily {

static rocksdb::ColumnFamilyHandle* edge() { return _edge; }

/// non unique vpack indexes (skiplist, permanent indexes)
/// unique and non-unique vpack indexes (skiplist, permanent indexes)
static rocksdb::ColumnFamilyHandle* vpack() { return _vpack; }

static rocksdb::ColumnFamilyHandle* geo() { return _geo; }
Expand Down
31 changes: 31 additions & 0 deletions arangod/RocksDBEngine/RocksDBKeyBounds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,20 @@ RocksDBKeyBounds RocksDBKeyBounds::VPackIndex(uint64_t indexId,
right);
}

/// used for seeking lookups
RocksDBKeyBounds RocksDBKeyBounds::UniqueVPackIndex(uint64_t indexId,
VPackSlice const& left,
VPackSlice const& right) {
return RocksDBKeyBounds(RocksDBEntryType::UniqueVPackIndexValue, indexId,
left, right);
}

/// used for point lookups
RocksDBKeyBounds RocksDBKeyBounds::UniqueVPackIndex(uint64_t indexId,
VPackSlice const& left) {
return RocksDBKeyBounds(RocksDBEntryType::UniqueVPackIndexValue, indexId, left);
}

RocksDBKeyBounds RocksDBKeyBounds::DatabaseViews(TRI_voc_tick_t databaseId) {
return RocksDBKeyBounds(RocksDBEntryType::View, databaseId);
}
Expand Down Expand Up @@ -371,6 +378,30 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
}
}

/// point lookups for unique velocypack indexes
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
VPackSlice const& second)
: _type(type) {
switch (_type) {
case RocksDBEntryType::UniqueVPackIndexValue: {
size_t startLength =
sizeof(uint64_t) + static_cast<size_t>(second.byteSize());

_internals.reserve(startLength);
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append(reinterpret_cast<char const*>(second.begin()),
static_cast<size_t>(second.byteSize()));

_internals.separate();
// second bound is intentionally left empty!
break;
}

default:
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
}
}

/// iterate over the specified bounds of the velocypack index
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
VPackSlice const& second,
Expand Down
7 changes: 7 additions & 0 deletions arangod/RocksDBEngine/RocksDBKeyBounds.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ class RocksDBKeyBounds {
static RocksDBKeyBounds UniqueVPackIndex(uint64_t indexId,
VPackSlice const& left,
VPackSlice const& right);

//////////////////////////////////////////////////////////////////////////////
/// @brief Bounds for all documents within a value range belonging to a
/// specified unique index. this method is used for point lookups
//////////////////////////////////////////////////////////////////////////////
static RocksDBKeyBounds UniqueVPackIndex(uint64_t indexId,
VPackSlice const& left);

//////////////////////////////////////////////////////////////////////////////
/// @brief Bounds for all views belonging to a specified database
Expand Down
19 changes: 13 additions & 6 deletions arangod/RocksDBEngine/RocksDBMethods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ void RocksDBSavePoint::rollback() {

// =================== RocksDBMethods ===================

arangodb::Result RocksDBMethods::Get(rocksdb::ColumnFamilyHandle* cf,
RocksDBKey const& key,
std::string* val) {
std::string const& keyString = key.string();
return Get(cf, rocksdb::Slice(keyString), val);
}

rocksdb::ReadOptions const& RocksDBMethods::readOptions() {
return _state->_rocksReadOptions;
}
Expand Down Expand Up @@ -119,12 +126,12 @@ bool RocksDBReadOnlyMethods::Exists(rocksdb::ColumnFamilyHandle* cf,
}

arangodb::Result RocksDBReadOnlyMethods::Get(rocksdb::ColumnFamilyHandle* cf,
RocksDBKey const& key,
rocksdb::Slice const& key,
std::string* val) {
TRI_ASSERT(cf != nullptr);
rocksdb::ReadOptions const& ro = _state->_rocksReadOptions;
TRI_ASSERT(ro.snapshot != nullptr);
rocksdb::Status s = _db->Get(ro, cf, key.string(), val);
rocksdb::Status s = _db->Get(ro, cf, key, val);
return s.ok() ? arangodb::Result() : rocksutils::convertStatus(s);
}

Expand Down Expand Up @@ -161,12 +168,12 @@ bool RocksDBTrxMethods::Exists(rocksdb::ColumnFamilyHandle* cf,
}

arangodb::Result RocksDBTrxMethods::Get(rocksdb::ColumnFamilyHandle* cf,
RocksDBKey const& key,
rocksdb::Slice const& key,
std::string* val) {
TRI_ASSERT(cf != nullptr);
rocksdb::ReadOptions const& ro = _state->_rocksReadOptions;
TRI_ASSERT(ro.snapshot != nullptr);
rocksdb::Status s = _state->_rocksTransaction->Get(ro, cf, key.string(), val);
rocksdb::Status s = _state->_rocksTransaction->Get(ro, cf, key, val);
return s.ok() ? arangodb::Result() : rocksutils::convertStatus(s);
}

Expand Down Expand Up @@ -220,11 +227,11 @@ bool RocksDBBatchedMethods::Exists(rocksdb::ColumnFamilyHandle* cf,
}

arangodb::Result RocksDBBatchedMethods::Get(rocksdb::ColumnFamilyHandle* cf,
RocksDBKey const& key,
rocksdb::Slice const& key,
std::string* val) {
TRI_ASSERT(cf != nullptr);
rocksdb::ReadOptions ro;
rocksdb::Status s = _wb->GetFromBatchAndDB(_db, ro, cf, key.string(), val);
rocksdb::Status s = _wb->GetFromBatchAndDB(_db, ro, cf, key, val);
return s.ok() ? arangodb::Result() : rocksutils::convertStatus(s);
}

Expand Down
20 changes: 12 additions & 8 deletions arangod/RocksDBEngine/RocksDBMethods.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class RocksDBMethods {
rocksdb::ReadOptions const& readOptions();

virtual bool Exists(rocksdb::ColumnFamilyHandle*, RocksDBKey const&) = 0;
virtual arangodb::Result Get(rocksdb::ColumnFamilyHandle*, RocksDBKey const&,
virtual arangodb::Result Get(rocksdb::ColumnFamilyHandle*, rocksdb::Slice const&,
std::string*) = 0;
virtual arangodb::Result Put(
rocksdb::ColumnFamilyHandle*, RocksDBKey const&, rocksdb::Slice const&,
Expand All @@ -86,6 +86,11 @@ class RocksDBMethods {

virtual void SetSavePoint() = 0;
virtual arangodb::Result RollbackToSavePoint() = 0;

// convenience and compatibility method
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, RocksDBKey const&,
std::string*);


#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
std::size_t countInBounds(RocksDBKeyBounds const& bounds, bool isElementInRange = false);
Expand All @@ -96,14 +101,13 @@ class RocksDBMethods {
};

// only implements GET and NewIterator
class RocksDBReadOnlyMethods : public RocksDBMethods {
class RocksDBReadOnlyMethods final : public RocksDBMethods {
public:
explicit RocksDBReadOnlyMethods(RocksDBTransactionState* state);

bool Exists(rocksdb::ColumnFamilyHandle*, RocksDBKey const&) override;
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, RocksDBKey const& key,
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, rocksdb::Slice const& key,
std::string* val) override;

arangodb::Result Put(
rocksdb::ColumnFamilyHandle*, RocksDBKey const& key,
rocksdb::Slice const& val,
Expand All @@ -122,12 +126,12 @@ class RocksDBReadOnlyMethods : public RocksDBMethods {
};

/// transactio wrapper, uses the current rocksdb transaction
class RocksDBTrxMethods : public RocksDBMethods {
class RocksDBTrxMethods final : public RocksDBMethods {
public:
explicit RocksDBTrxMethods(RocksDBTransactionState* state);

bool Exists(rocksdb::ColumnFamilyHandle*, RocksDBKey const&) override;
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, RocksDBKey const& key,
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, rocksdb::Slice const& key,
std::string* val) override;

arangodb::Result Put(
Expand All @@ -145,13 +149,13 @@ class RocksDBTrxMethods : public RocksDBMethods {
};

/// wraps a writebatch - non transactional
class RocksDBBatchedMethods : public RocksDBMethods {
class RocksDBBatchedMethods final : public RocksDBMethods {
public:
RocksDBBatchedMethods(RocksDBTransactionState*,
rocksdb::WriteBatchWithIndex*);

bool Exists(rocksdb::ColumnFamilyHandle*, RocksDBKey const&) override;
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, RocksDBKey const& key,
arangodb::Result Get(rocksdb::ColumnFamilyHandle*, rocksdb::Slice const& key,
std::string* val) override;
arangodb::Result Put(
rocksdb::ColumnFamilyHandle*, RocksDBKey const& key,
Expand Down
70 changes: 54 additions & 16 deletions arangod/RocksDBEngine/RocksDBVPackIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,55 @@ static std::vector<arangodb::basics::AttributeName> const KeyAttribute{
// lists: lexicographically and within each slot according to these rules.
// ...........................................................................

RocksDBVPackUniqueIndexIterator::RocksDBVPackUniqueIndexIterator(
LogicalCollection* collection, transaction::Methods* trx,
ManagedDocumentResult* mmdr, arangodb::RocksDBVPackIndex const* index,
RocksDBKeyBounds&& bounds)
: IndexIterator(collection, trx, mmdr, index),
_index(index),
_cmp(index->comparator()),
_bounds(std::move(bounds)),
_done(false) {
TRI_ASSERT(index->columnFamily() == RocksDBColumnFamily::vpack());
}

/// @brief Reset the cursor
void RocksDBVPackUniqueIndexIterator::reset() {
TRI_ASSERT(_trx->state()->isRunning());

_done = false;
}

bool RocksDBVPackUniqueIndexIterator::next(TokenCallback const& cb, size_t limit) {
TRI_ASSERT(_trx->state()->isRunning());

if (limit == 0 || _done) {
// already looked up something
return false;
}

_done = true;

auto value = RocksDBValue::Empty(RocksDBEntryType::PrimaryIndexValue);
RocksDBMethods* mthds = RocksDBTransactionState::toMethods(_trx);
arangodb::Result r = mthds->Get(_index->columnFamily(), _bounds.start(), value.buffer());

if (r.ok()) {
cb(RocksDBToken(RocksDBValue::revisionId(*value.buffer())));
}

// there is at most one element, so we are done now
return false;
}

RocksDBVPackIndexIterator::RocksDBVPackIndexIterator(
LogicalCollection* collection, transaction::Methods* trx,
ManagedDocumentResult* mmdr, arangodb::RocksDBVPackIndex const* index,
bool reverse, bool singleElementFetch, RocksDBKeyBounds&& bounds)
bool reverse, RocksDBKeyBounds&& bounds)
: IndexIterator(collection, trx, mmdr, index),
_index(index),
_cmp(index->comparator()),
_reverse(reverse),
_singleElementFetch(singleElementFetch),
_bounds(std::move(bounds)) {
TRI_ASSERT(index->columnFamily() == RocksDBColumnFamily::vpack());

Expand Down Expand Up @@ -147,13 +187,6 @@ bool RocksDBVPackIndexIterator::next(TokenCallback const& cb, size_t limit) {
: RocksDBKey::revisionId(_bounds.type(), _iterator->key());
cb(RocksDBToken(revisionId));

if (_singleElementFetch) {
// we only need to fetch a single element from the index and are done then
// this is a useful optimization because seeking forwards or backwards
// with the iterator can be very expensive
return false;
}

--limit;
if (_reverse) {
_iterator->Prev();
Expand Down Expand Up @@ -610,14 +643,13 @@ Result RocksDBVPackIndex::removeInternal(transaction::Methods* trx,
/// @brief attempts to locate an entry in the index
/// Warning: who ever calls this function is responsible for destroying
/// the RocksDBVPackIndexIterator* results
RocksDBVPackIndexIterator* RocksDBVPackIndex::lookup(
IndexIterator* RocksDBVPackIndex::lookup(
transaction::Methods* trx, ManagedDocumentResult* mmdr,
VPackSlice const searchValues, bool reverse) const {
TRI_ASSERT(searchValues.isArray());
TRI_ASSERT(searchValues.length() <= _fields.size());

VPackBuilder leftSearch;
VPackBuilder rightSearch;

VPackSlice lastNonEq;
leftSearch.openArray();
Expand All @@ -630,9 +662,18 @@ RocksDBVPackIndexIterator* RocksDBVPackIndex::lookup(
}
leftSearch.add(eq);
}

if (lastNonEq.isNone() && _unique && searchValues.length() == _fields.size()) {
leftSearch.close();
RocksDBKeyBounds bounds = RocksDBKeyBounds::UniqueVPackIndex(_objectId, leftSearch.slice());

return new RocksDBVPackUniqueIndexIterator(_collection, trx, mmdr, this, std::move(bounds));
}

VPackSlice leftBorder;
VPackSlice rightBorder;

VPackBuilder rightSearch;

if (lastNonEq.isNone()) {
// We only have equality!
Expand Down Expand Up @@ -703,15 +744,12 @@ RocksDBVPackIndexIterator* RocksDBVPackIndex::lookup(
}
}

bool const singleElementFetch = (_unique && lastNonEq.isNone() &&
searchValues.length() == _fields.size());

RocksDBKeyBounds bounds = _unique ? RocksDBKeyBounds::UniqueVPackIndex(
_objectId, leftBorder, rightBorder)
: RocksDBKeyBounds::VPackIndex(
_objectId, leftBorder, rightBorder);
return new RocksDBVPackIndexIterator(_collection, trx, mmdr, this, reverse,
singleElementFetch, std::move(bounds));

return new RocksDBVPackIndexIterator(_collection, trx, mmdr, this, reverse, std::move(bounds));
}

bool RocksDBVPackIndex::accessFitsIndex(
Expand Down
Loading

0 comments on commit 5c5b5e5

Please sign in to comment.