Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache simulator: Optimize hybrid row-block cache. #5616

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 25 additions & 20 deletions utilities/simulator_cache/cache_simulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,26 @@ void HybridRowBlockCacheSimulator::Access(const BlockCacheTraceRecord& access) {
// TODO (haoyu): We only support Get for now. We need to extend the tracing
// for MultiGet, i.e., non-data block accesses must log all keys in a
// MultiGet.
bool is_cache_miss = false;
bool is_cache_miss = true;
bool admitted = false;
if (access.caller == TableReaderCaller::kUserGet &&
access.get_id != BlockCacheTraceHelper::kReservedGetId) {
// This is a Get/MultiGet request.
// This is a Get request.
const std::string& row_key = BlockCacheTraceHelper::ComputeRowKey(access);
if (getid_getkeys_map_[access.get_id].find(row_key) ==
getid_getkeys_map_[access.get_id].end()) {
GetRequestStatus& status = getid_status_map_[access.get_id];
if (status.is_complete) {
// This Get request completes.
// Skip future accesses to its index/filter/data
// blocks. These block lookups are unnecessary if we observe a hit for the
// referenced key-value pair already. Thus, we treat these lookups as
// hits. This is also to ensure the total number of accesses are the same
// when comparing to other policies.
miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
/*is_user_access=*/true,
/*is_cache_miss=*/false);
return;
}
if (status.row_key_status.find(row_key) == status.row_key_status.end()) {
// This is the first time that this key is accessed. Look up the key-value
// pair first. Do not update the miss/accesses metrics here since it will
// be updated later.
Expand All @@ -144,37 +156,30 @@ void HybridRowBlockCacheSimulator::Access(const BlockCacheTraceRecord& access) {
} else if (admitted) {
result = InsertResult::ADMITTED;
}
getid_getkeys_map_[access.get_id][row_key] =
std::make_pair(is_cache_miss, result);
status.row_key_status[row_key] = result;
}
std::pair<bool, InsertResult> miss_inserted =
getid_getkeys_map_[access.get_id][row_key];
if (!miss_inserted.first) {
// This is a cache hit. Skip future accesses to its index/filter/data
// blocks. These block lookups are unnecessary if we observe a hit for the
// referenced key-value pair already. Thus, we treat these lookups as
// hits. This is also to ensure the total number of accesses are the same
// when comparing to other policies.
if (!is_cache_miss) {
// A cache hit.
status.is_complete = true;
miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
/*is_user_access=*/true,
/*is_cache_miss=*/false);
return;
}
// The key-value pair observes a cache miss. We need to access its
// The row key-value pair observes a cache miss. We need to access its
// index/filter/data blocks.
InsertResult inserted = status.row_key_status[row_key];
AccessKVPair(
access.block_key, access.block_type, ComputeBlockPriority(access),
access.block_key, access.block_size, ComputeBlockPriority(access),
access,
/*no_insert=*/!insert_blocks_upon_row_kvpair_miss_ || access.no_insert,
/*is_user_access=*/true, &is_cache_miss, &admitted,
/*update_metrics=*/true);
if (access.referenced_data_size > 0 &&
miss_inserted.second == InsertResult::ADMITTED) {
if (access.referenced_data_size > 0 && inserted == InsertResult::ADMITTED) {
sim_cache_->Insert(row_key, /*value=*/nullptr,
access.referenced_data_size, /*deleter=*/nullptr,
/*handle=*/nullptr, Cache::Priority::HIGH);
getid_getkeys_map_[access.get_id][row_key] =
std::make_pair(true, InsertResult::INSERTED);
status.row_key_status[row_key] = InsertResult::INSERTED;
}
return;
}
Expand Down
19 changes: 14 additions & 5 deletions utilities/simulator_cache/cache_simulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class MissRatioStats {
return static_cast<double>(num_misses_ * 100.0 / num_accesses_);
}
uint64_t total_accesses() const { return num_accesses_; }
uint64_t total_misses() const { return num_misses_; }

const std::map<uint64_t, uint64_t>& num_accesses_timeline() const {
return num_accesses_timeline_;
Expand All @@ -63,6 +64,7 @@ class MissRatioStats {
return static_cast<double>(user_misses_ * 100.0 / user_accesses_);
}
uint64_t user_accesses() const { return user_accesses_; }
uint64_t user_misses() const { return user_misses_; }

void UpdateMetrics(uint64_t timestamp_in_ms, bool is_user_access,
bool is_cache_miss);
Expand Down Expand Up @@ -168,17 +170,24 @@ class HybridRowBlockCacheSimulator : public PrioritizedCacheSimulator {
NO_INSERT,
};

// A map stores get_id to a map of row keys. For each row key, it stores a
// boolean and an enum. The first bool is true when we observe a miss upon the
// first time we encounter the row key. The second arg is INSERTED when the
// We set is_complete to true when the referenced row-key of a get request
// hits the cache. If is_complete is true, we treat future accesses of this
// get request as hits.
//
// For each row key, it stores an enum. It is INSERTED when the
// kv-pair has been inserted into the cache, ADMITTED if it should be inserted
// but haven't been, NO_INSERT if it should not be inserted.
//
// A kv-pair is in ADMITTED state when we encounter this kv-pair but do not
// know its size. This may happen if the first access on the referenced key is
// an index/filter block.
std::map<uint64_t, std::map<std::string, std::pair<bool, InsertResult>>>
getid_getkeys_map_;
struct GetRequestStatus {
bool is_complete = false;
std::map<std::string, InsertResult> row_key_status;
};

// A map stores get_id to a map of row keys.
std::map<uint64_t, GetRequestStatus> getid_status_map_;
bool insert_blocks_upon_row_kvpair_miss_;
};

Expand Down
149 changes: 147 additions & 2 deletions utilities/simulator_cache/cache_simulator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace rocksdb {
namespace {
const std::string kBlockKeyPrefix = "test-block-";
const std::string kRefKeyPrefix = "test-get-";
const std::string kRefKeySequenceNumber = std::string(8, 'c');
const uint64_t kGetId = 1;
const uint64_t kGetBlockId = 100;
const uint64_t kCompactionBlockId = 1000;
Expand All @@ -38,12 +39,12 @@ class CacheSimulatorTest : public testing::Test {
record.cf_name = "test";
record.caller = TableReaderCaller::kUserGet;
record.level = 6;
record.sst_fd_number = kGetBlockId;
record.sst_fd_number = 0;
record.get_id = getid;
record.is_cache_hit = Boolean::kFalse;
record.no_insert = Boolean::kFalse;
record.referenced_key =
kRefKeyPrefix + std::to_string(kGetId) + std::string(8, 'c');
kRefKeyPrefix + std::to_string(kGetId) + kRefKeySequenceNumber;
record.referenced_key_exist_in_block = Boolean::kTrue;
record.referenced_data_size = 100;
record.num_keys_in_block = 300;
Expand All @@ -66,6 +67,29 @@ class CacheSimulatorTest : public testing::Test {
return record;
}

void AssertCache(std::shared_ptr<Cache> sim_cache,
const MissRatioStats& miss_ratio_stats,
uint64_t expected_usage, uint64_t expected_num_accesses,
uint64_t expected_num_misses,
std::vector<std::string> blocks,
std::vector<std::string> keys) {
EXPECT_EQ(expected_usage, sim_cache->GetUsage());
EXPECT_EQ(expected_num_accesses, miss_ratio_stats.total_accesses());
EXPECT_EQ(expected_num_misses, miss_ratio_stats.total_misses());
for (auto const& block : blocks) {
auto handle = sim_cache->Lookup(block);
EXPECT_NE(nullptr, handle);
sim_cache->Release(handle);
}
for (auto const& key : keys) {
std::string row_key = kRefKeyPrefix + key + kRefKeySequenceNumber;
auto handle =
sim_cache->Lookup("0_" + ExtractUserKey(row_key).ToString() + "_0");
EXPECT_NE(nullptr, handle);
sim_cache->Release(handle);
}
}

Env* env_;
};

Expand Down Expand Up @@ -277,6 +301,127 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulator) {
}
}

TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulatorGetTest) {
BlockCacheTraceRecord get = GenerateGetRecord(kGetId);
get.block_size = 1;
get.referenced_data_size = 0;
get.access_timestamp = 0;
get.block_key = "1";
get.get_id = 1;
get.get_from_user_specified_snapshot = Boolean::kFalse;
get.referenced_key =
kRefKeyPrefix + std::to_string(1) + kRefKeySequenceNumber;
get.no_insert = Boolean::kFalse;
get.sst_fd_number = 0;
get.get_from_user_specified_snapshot = Boolean::kFalse;

std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/16, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
std::unique_ptr<HybridRowBlockCacheSimulator> cache_simulator(
new HybridRowBlockCacheSimulator(
nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/true));
// Expect a miss and does not insert the row key-value pair since it does not
// have size.
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 1, 1, 1, {"1"},
{});
get.access_timestamp += 1;
get.referenced_data_size = 1;
get.block_key = "2";
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 3, 2, 2,
{"1", "2"}, {"1"});
get.access_timestamp += 1;
get.block_key = "3";
// K1 should not inserted again.
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 4, 3, 3,
{"1", "2", "3"}, {"1"});

// A second get request referencing the same key.
get.access_timestamp += 1;
get.get_id = 2;
get.block_key = "4";
get.referenced_data_size = 0;
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 4, 4, 3,
{"1", "2", "3"}, {"1"});

// A third get request searches three files, three different keys.
// And the second key observes a hit.
get.access_timestamp += 1;
get.referenced_data_size = 1;
get.get_id = 3;
get.block_key = "3";
get.referenced_key = kRefKeyPrefix + "2" + kRefKeySequenceNumber;
// K2 should observe a miss. Block 3 observes a hit.
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 5, 5, 3,
{"1", "2", "3"}, {"1", "2"});

get.access_timestamp += 1;
get.referenced_data_size = 1;
get.get_id = 3;
get.block_key = "4";
get.referenced_data_size = 1;
get.referenced_key = kRefKeyPrefix + "1" + kRefKeySequenceNumber;
// K1 should observe a hit.
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 5, 6, 3,
{"1", "2", "3"}, {"1", "2"});

get.access_timestamp += 1;
get.referenced_data_size = 1;
get.get_id = 3;
get.block_key = "4";
get.referenced_data_size = 1;
get.referenced_key = kRefKeyPrefix + "3" + kRefKeySequenceNumber;
// K3 should observe a miss.
// However, as the get already complete, we should not access k3 any more.
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 5, 7, 3,
{"1", "2", "3"}, {"1", "2"});

// A fourth get request searches one file and two blocks. One row key.
get.access_timestamp += 1;
get.get_id = 4;
get.block_key = "5";
get.referenced_key = kRefKeyPrefix + "4" + kRefKeySequenceNumber;
get.referenced_data_size = 1;
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 7, 8, 4,
{"1", "2", "3", "5"}, {"1", "2", "4"});
for (auto const& key : {"1", "2", "4"}) {
auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key + "_0");
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
}

// A bunch of insertions which evict cached row keys.
for (uint32_t i = 6; i < 100; i++) {
get.access_timestamp += 1;
get.get_id = 0;
get.block_key = std::to_string(i);
cache_simulator->Access(get);
}

get.get_id = 4;
// A different block.
get.block_key = "100";
// Same row key and should not be inserted again.
get.referenced_key = kRefKeyPrefix + "4" + kRefKeySequenceNumber;
get.referenced_data_size = 1;
cache_simulator->Access(get);
AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 16, 103, 99, {},
{});
for (auto const& key : {"1", "2", "4"}) {
auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key + "_0");
ASSERT_EQ(nullptr, handle);
}
}

TEST_F(CacheSimulatorTest, HybridRowBlockNoInsertCacheSimulator) {
uint64_t block_id = 100;
BlockCacheTraceRecord first_get = GenerateGetRecord(kGetId);
Expand Down