Skip to content

Commit

Permalink
Fix a few bugs in best-efforts recovery (#6824)
Browse files Browse the repository at this point in the history
Summary:
1. Update column_family_memtables_ to point to latest column_family_set in
   version_set after recovery.
2. Normalize file paths passed by application so that directories end with '/'
   or '\\'.
3. In addition to missing files, corrupted files are also ignored in
   best-efforts recovery.
Pull Request resolved: #6824

Test Plan: COMPILE_WITH_ASAN=1 make check

Reviewed By: anand1976

Differential Revision: D21463905

Pulled By: riversand963

fbshipit-source-id: c48db8843cc93c8c1c7139c474b64e6f775307d2
  • Loading branch information
riversand963 authored and facebook-github-bot committed May 8, 2020
1 parent 1c84660 commit e72e216
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 8 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true.
* Fix possible false NotFound status from batched MultiGet using index type kHashSearch.
* Fix corruption caused by enabling delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode, along with parallel compactions. The bug can result in two parallel compactions picking the same input files, resulting in the DB resurrecting older and deleted versions of some keys.
* Fix a use-after-free bug in best-efforts recovery. column_family_memtables_ needs to point to valid ColumnFamilySet.
* Let best-efforts recovery ignore corrupted files during table loading.

### Public API Change
* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request.
Expand Down
35 changes: 33 additions & 2 deletions db/db_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1840,11 +1840,16 @@ TEST_F(DBBasicTest, RecoverWithMissingFiles) {
ASSERT_OK(Flush(static_cast<int>(cf)));
}

// Delete files
// Delete and corrupt files
for (size_t i = 0; i < all_cf_names.size(); ++i) {
std::vector<std::string>& files = listener->GetFiles(all_cf_names[i]);
ASSERT_EQ(3, files.size());
for (int j = static_cast<int>(files.size() - 1); j >= static_cast<int>(i);
std::string corrupted_data;
ASSERT_OK(ReadFileToString(env_, files[files.size() - 1], &corrupted_data));
ASSERT_OK(WriteStringToFile(
env_, corrupted_data.substr(0, corrupted_data.size() - 2),
files[files.size() - 1], /*should_sync=*/true));
for (int j = static_cast<int>(files.size() - 2); j >= static_cast<int>(i);
--j) {
ASSERT_OK(env_->DeleteFile(files[j]));
}
Expand Down Expand Up @@ -1876,6 +1881,32 @@ TEST_F(DBBasicTest, RecoverWithMissingFiles) {
}
}

TEST_F(DBBasicTest, BestEffortsRecoveryTryMultipleManifests) {
Options options = CurrentOptions();
options.env = env_;
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "value0"));
ASSERT_OK(Flush());
Close();
{
// Hack by adding a new MANIFEST with high file number
std::string garbage(10, '\0');
ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/MANIFEST-001000",
/*should_sync=*/true));
}
{
// Hack by adding a corrupted SST not referenced by any MANIFEST
std::string garbage(10, '\0');
ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/001001.sst",
/*should_sync=*/true));
}

options.best_efforts_recovery = true;

Reopen(options);
ASSERT_OK(Put("bar", "value"));
}

TEST_F(DBBasicTest, RecoverWithNoCurrentFile) {
Options options = CurrentOptions();
options.env = env_;
Expand Down
11 changes: 7 additions & 4 deletions db/db_impl/db_impl_files.cc
Original file line number Diff line number Diff line change
Expand Up @@ -686,13 +686,15 @@ uint64_t PrecomputeMinLogNumberToKeep(
Status DBImpl::FinishBestEffortsRecovery() {
mutex_.AssertHeld();
std::vector<std::string> paths;
paths.push_back(dbname_);
paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
for (const auto& db_path : immutable_db_options_.db_paths) {
paths.push_back(db_path.path);
paths.push_back(
NormalizePath(db_path.path + std::string(1, kFilePathSeparator)));
}
for (const auto* cfd : *versions_->GetColumnFamilySet()) {
for (const auto& cf_path : cfd->ioptions()->cf_paths) {
paths.push_back(cf_path.path);
paths.push_back(
NormalizePath(cf_path.path + std::string(1, kFilePathSeparator)));
}
}
// Dedup paths
Expand All @@ -711,7 +713,8 @@ Status DBImpl::FinishBestEffortsRecovery() {
if (!ParseFileName(fname, &number, &type)) {
continue;
}
const std::string normalized_fpath = NormalizePath(path + fname);
// path ends with '/' or '\\'
const std::string normalized_fpath = path + fname;
largest_file_number = std::max(largest_file_number, number);
if (type == kTableFile && number >= next_file_number &&
files_to_delete.find(normalized_fpath) == files_to_delete.end()) {
Expand Down
3 changes: 3 additions & 0 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ Status DBImpl::Recover(
s = versions_->TryRecover(column_families, read_only, &db_id_,
&missing_table_file);
if (s.ok()) {
// TryRecover may delete previous column_family_set_.
column_family_memtables_.reset(
new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet()));
s = FinishBestEffortsRecovery();
}
}
Expand Down
5 changes: 3 additions & 2 deletions db/version_edit_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,8 @@ Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd,
version_set_->db_options_->max_file_opening_threads,
prefetch_index_and_filter_in_cache, is_initial_load,
cfd->GetLatestMutableCFOptions()->prefix_extractor.get());
if (s.IsPathNotFound() && no_error_if_table_files_missing_) {
if ((s.IsPathNotFound() || s.IsCorruption()) &&
no_error_if_table_files_missing_) {
s = Status::OK();
}
if (!s.ok() && !version_set_->db_options_->paranoid_checks) {
Expand Down Expand Up @@ -546,7 +547,7 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
const std::string fpath =
MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num);
s = version_set_->VerifyFileMetadata(fpath, meta);
if (s.IsPathNotFound() || s.IsNotFound()) {
if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
missing_files.insert(file_num);
s = Status::OK();
}
Expand Down

0 comments on commit e72e216

Please sign in to comment.