New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable a multi-level db to smoothly migrate to FIFO via DB::Open #10348
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,20 +121,45 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( | |
return c; | ||
} | ||
|
||
// The size-based compaction picker for FIFO. | ||
// | ||
// When the entire column family size exceeds max_table_files_size, FIFO will | ||
// try to delete the oldest sst file(s) until the resulting column family size | ||
// is smaller than max_table_files_size. | ||
// | ||
// This function also takes care the case where a DB is migrating from level / | ||
// universal compaction to FIFO compaction. During the migration, the column | ||
// family will also have non-L0 files while FIFO can only create L0 files. | ||
// In this case, this function will first purge the sst files in the bottom- | ||
// most non-empty level first, and the DB will eventually converge to the | ||
// regular FIFO case where there're only L0 files. Note that during the | ||
// migration case, the purge order will only be an approximation of "FIFO" | ||
// as entries inside lower-level files might sometimes be newer than some | ||
// entries inside upper-level files. | ||
Compaction* FIFOCompactionPicker::PickSizeCompaction( | ||
const std::string& cf_name, const MutableCFOptions& mutable_cf_options, | ||
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, | ||
LogBuffer* log_buffer) { | ||
const int kLevel0 = 0; | ||
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0); | ||
uint64_t total_size = GetTotalFilesSize(level_files); | ||
// compute the total size and identify the last non-empty level | ||
int last_level = 0; | ||
uint64_t total_size = 0; | ||
for (int level = 0; level < vstorage->num_levels(); ++level) { | ||
auto level_size = GetTotalFilesSize(vstorage->LevelFiles(level)); | ||
total_size += level_size; | ||
if (level_size > 0) { | ||
last_level = level; | ||
} | ||
} | ||
const std::vector<FileMetaData*>& last_level_files = | ||
vstorage->LevelFiles(last_level); | ||
|
||
if (total_size <= | ||
mutable_cf_options.compaction_options_fifo.max_table_files_size || | ||
level_files.size() == 0) { | ||
// total size not exceeded | ||
if (last_level == 0 && | ||
total_size <= | ||
mutable_cf_options.compaction_options_fifo.max_table_files_size) { | ||
// total size not exceeded, try to find intra level 0 compaction if enabled | ||
const std::vector<FileMetaData*>& level0_files = vstorage->LevelFiles(0); | ||
if (mutable_cf_options.compaction_options_fifo.allow_compaction && | ||
level_files.size() > 0) { | ||
level0_files.size() > 0) { | ||
CompactionInputFiles comp_inputs; | ||
// try to prevent same files from being compacted multiple times, which | ||
// could produce large files that may never TTL-expire. Achieve this by | ||
|
@@ -146,7 +171,7 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction( | |
static_cast<uint64_t>(mutable_cf_options.write_buffer_size), | ||
1.1)); | ||
if (FindIntraL0Compaction( | ||
level_files, | ||
level0_files, | ||
mutable_cf_options | ||
.level0_file_num_compaction_trigger /* min_files_to_compact */ | ||
, | ||
|
@@ -187,27 +212,58 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction( | |
|
||
std::vector<CompactionInputFiles> inputs; | ||
inputs.emplace_back(); | ||
inputs[0].level = 0; | ||
inputs[0].level = last_level; | ||
|
||
for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { | ||
auto f = *ritr; | ||
total_size -= f->fd.file_size; | ||
inputs[0].files.push_back(f); | ||
char tmp_fsize[16]; | ||
AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize)); | ||
ROCKS_LOG_BUFFER(log_buffer, | ||
"[%s] FIFO compaction: picking file %" PRIu64 | ||
" with size %s for deletion", | ||
cf_name.c_str(), f->fd.GetNumber(), tmp_fsize); | ||
if (total_size <= | ||
mutable_cf_options.compaction_options_fifo.max_table_files_size) { | ||
break; | ||
if (last_level == 0) { | ||
// In L0, right-most files are the oldest files. | ||
for (auto ritr = last_level_files.rbegin(); ritr != last_level_files.rend(); | ||
++ritr) { | ||
auto f = *ritr; | ||
total_size -= f->fd.file_size; | ||
inputs[0].files.push_back(f); | ||
char tmp_fsize[16]; | ||
AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize)); | ||
ROCKS_LOG_BUFFER(log_buffer, | ||
"[%s] FIFO compaction: picking file %" PRIu64 | ||
" with size %s for deletion", | ||
cf_name.c_str(), f->fd.GetNumber(), tmp_fsize); | ||
if (total_size <= | ||
mutable_cf_options.compaction_options_fifo.max_table_files_size) { | ||
break; | ||
} | ||
} | ||
} else { | ||
// If the last level is non-L0, we actually don't know which file is | ||
// logically the oldest since the file creation time only represents | ||
// when this file was compacted to this level, which is independent | ||
// to when the entries in this file were first inserted. | ||
// | ||
// As a result, we delete files from the left instead. This means the sst | ||
// file with the smallest key will be deleted first. This design decision | ||
// better serves a major type of FIFO use cases where smaller keys are | ||
// associated with older data. | ||
for (const auto& f : last_level_files) { | ||
total_size -= f->fd.file_size; | ||
inputs[0].files.push_back(f); | ||
char tmp_fsize[16]; | ||
AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize)); | ||
ROCKS_LOG_BUFFER(log_buffer, | ||
"[%s] FIFO compaction: picking file %" PRIu64 | ||
" with size %s for deletion", | ||
cf_name.c_str(), f->fd.GetNumber(), tmp_fsize); | ||
if (total_size <= | ||
mutable_cf_options.compaction_options_fifo.max_table_files_size) { | ||
break; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are lots of common code in this if-else. Wonder whether there is a way to consolidate the logic. I can't think of a very easy way though. Perhaps define a lambda function and call it in two places? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lambda sounds like a good idea. Originally I was trying to see whether I can define an iterator variable and initialize it based on whether it's forward or reverse, but I later found iterator and reverse-iterator are not compatible in their types. |
||
} | ||
} | ||
|
||
Compaction* c = new Compaction( | ||
vstorage, ioptions_, mutable_cf_options, mutable_db_options, | ||
std::move(inputs), 0, 0, 0, 0, kNoCompression, | ||
std::move(inputs), last_level, | ||
/* target_file_size */ 0, | ||
/* max_compaction_bytes */ 0, | ||
/* output_path_id */ 0, kNoCompression, | ||
mutable_cf_options.compression_opts, Temperature::kUnknown, | ||
/* max_subcompactions */ 0, {}, /* is manual */ false, | ||
/* trim_ts */ "", vstorage->CompactionScore(0), | ||
|
@@ -224,6 +280,13 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm( | |
return nullptr; | ||
} | ||
|
||
// PickCompactionToWarm is only triggered if there is no non-L0 files. | ||
for (int level = 1; level < vstorage->num_levels(); ++level) { | ||
if (GetTotalFilesSize(vstorage->LevelFiles(level)) > 0) { | ||
return nullptr; | ||
} | ||
} | ||
|
||
const int kLevel0 = 0; | ||
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0); | ||
|
||
|
@@ -327,8 +390,6 @@ Compaction* FIFOCompactionPicker::PickCompaction( | |
const std::string& cf_name, const MutableCFOptions& mutable_cf_options, | ||
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, | ||
LogBuffer* log_buffer, SequenceNumber /*earliest_memtable_seqno*/) { | ||
assert(vstorage->num_levels() == 1); | ||
|
||
Compaction* c = nullptr; | ||
if (mutable_cf_options.ttl > 0) { | ||
c = PickTTLCompaction(cf_name, mutable_cf_options, mutable_db_options, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intra-level0 compaction logic is untouched. It will still only do compaction for level 0.