Skip to content

Commit

Permalink
Integrate blob file writing with the flush logic (#7345)
Browse files Browse the repository at this point in the history
Summary:
The patch adds support for writing blob files during flush by integrating
`BlobFileBuilder` with the flush logic, most importantly, `BuildTable` and
`CompactionIterator`. If `enable_blob_files` is set, large values are extracted
to blob files and replaced with references. The resulting blob files are then
logged to the MANIFEST as part of the flush job's `VersionEdit` and
added to the `Version`, similarly to table files. Errors related to writing
blob files fail the flush, and any blob files written by such jobs are immediately
deleted (again, similarly to how SST files are handled). In addition, the patch
extends the logging and statistics around flushes to account for the presence
of blob files (e.g. `InternalStats::CompactionStats::bytes_written`, which is
used for calculating write amplification, now considers the blob files as well).

Pull Request resolved: #7345

Test Plan: Tested using `make check` and `db_bench`.

Reviewed By: riversand963

Differential Revision: D23506369

Pulled By: ltamasi

fbshipit-source-id: 646885f22dfbe063f650d38a1fedc132f499a159
  • Loading branch information
ltamasi authored and facebook-github-bot committed Sep 15, 2020
1 parent d4993b9 commit b0e7834
Show file tree
Hide file tree
Showing 18 changed files with 574 additions and 116 deletions.
18 changes: 15 additions & 3 deletions db/blob/blob_file_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ BlobFileBuilder::BlobFileBuilder(
int job_id, uint32_t column_family_id,
const std::string& column_family_name, Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, env,
fs, immutable_cf_options, mutable_cf_options,
file_options, job_id, column_family_id,
column_family_name, io_priority, write_hint,
blob_file_additions) {}
blob_file_paths, blob_file_additions) {}

BlobFileBuilder::BlobFileBuilder(
std::function<uint64_t()> file_number_generator, Env* env, FileSystem* fs,
Expand All @@ -45,6 +46,7 @@ BlobFileBuilder::BlobFileBuilder(
int job_id, uint32_t column_family_id,
const std::string& column_family_name, Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: file_number_generator_(std::move(file_number_generator)),
env_(env),
Expand All @@ -59,6 +61,7 @@ BlobFileBuilder::BlobFileBuilder(
column_family_name_(column_family_name),
io_priority_(io_priority),
write_hint_(write_hint),
blob_file_paths_(blob_file_paths),
blob_file_additions_(blob_file_additions),
blob_count_(0),
blob_bytes_(0) {
Expand All @@ -67,7 +70,10 @@ BlobFileBuilder::BlobFileBuilder(
assert(fs_);
assert(immutable_cf_options_);
assert(file_options_);
assert(blob_file_paths_);
assert(blob_file_paths_->empty());
assert(blob_file_additions_);
assert(blob_file_additions_->empty());
}

BlobFileBuilder::~BlobFileBuilder() = default;
Expand Down Expand Up @@ -145,7 +151,7 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {

assert(immutable_cf_options_);
assert(!immutable_cf_options_->cf_paths.empty());
const std::string blob_file_path = BlobFileName(
std::string blob_file_path = BlobFileName(
immutable_cf_options_->cf_paths.front().path, blob_file_number);

std::unique_ptr<FSWritableFile> file;
Expand All @@ -161,14 +167,20 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
}
}

// Note: files get added to blob_file_paths_ right after the open, so they
// can be cleaned up upon failure. Contrast this with blob_file_additions_,
// which only contains successfully written files.
assert(blob_file_paths_);
blob_file_paths_->emplace_back(std::move(blob_file_path));

assert(file);
file->SetIOPriority(io_priority_);
file->SetWriteLifeTimeHint(write_hint_);

Statistics* const statistics = immutable_cf_options_->statistics;

std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_path, *file_options_, env_,
std::move(file), blob_file_paths_->back(), *file_options_, env_,
nullptr /*IOTracer*/, statistics, immutable_cf_options_->listeners,
immutable_cf_options_->file_checksum_gen_factory));

Expand Down
3 changes: 3 additions & 0 deletions db/blob/blob_file_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class BlobFileBuilder {
const std::string& column_family_name,
Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);

BlobFileBuilder(std::function<uint64_t()> file_number_generator, Env* env,
Expand All @@ -47,6 +48,7 @@ class BlobFileBuilder {
const std::string& column_family_name,
Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);

BlobFileBuilder(const BlobFileBuilder&) = delete;
Expand Down Expand Up @@ -79,6 +81,7 @@ class BlobFileBuilder {
std::string column_family_name_;
Env::IOPriority io_priority_;
Env::WriteLifeTimeHint write_hint_;
std::vector<std::string>* blob_file_paths_;
std::vector<BlobFileAddition>* blob_file_additions_;
std::unique_ptr<BlobLogWriter> writer_;
uint64_t blob_count_;
Expand Down
Loading

0 comments on commit b0e7834

Please sign in to comment.