Skip to content

Commit 0044a76

Browse files
anand1976facebook-github-bot
authored andcommitted
Make failure to load UDI when opening an SST a soft failure (#13921)
Summary: If user_defined_index_factory in BlockBasedTableOptions is configured and we try to open an SST file without the corresponding UDI (either during DB open or file ingestion), ignore a failure to load the UDI by default. If fail_if_no_udi_on_open in BlockBasedTableOptions is true, then treat it as a fatal error. Pull Request resolved: #13921 Test Plan: Update unit tests Reviewed By: xingbowang Differential Revision: D81826054 Pulled By: anand1976 fbshipit-source-id: f4fe0b13ccb02b9448622af487680131e349c52b
1 parent a805c9b commit 0044a76

File tree

8 files changed

+150
-19
lines changed

8 files changed

+150
-19
lines changed

include/rocksdb/statistics.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,9 @@ enum Tickers : uint32_t {
542542
// TransactionOptions::large_txn_commit_optimize_threshold.
543543
NUMBER_WBWI_INGEST,
544544

545+
// Failure to load the UDI during SST table open
546+
SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT,
547+
545548
TICKER_ENUM_MAX
546549
};
547550

include/rocksdb/table.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,12 @@ struct BlockBasedTableOptions {
509509
// (CompressionOptions::parallel_threads sanitized to 1).
510510
std::shared_ptr<UserDefinedIndexFactory> user_defined_index_factory = nullptr;
511511

512+
// EXPERIMENTAL
513+
//
514+
// Return an error Status if a user_defined_index_factory is configured,
515+
// but there's no corresponding UDI block in the SST file being opened.
516+
bool fail_if_no_udi_on_open = false;
517+
512518
// If true, place whole keys in the filter (not just prefixes).
513519
// This must generally be true for gets to be efficient.
514520
bool whole_key_filtering = true;

monitoring/statistics.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
273273
{FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
274274
"rocksdb.file.read.corruption.retry.success.count"},
275275
{NUMBER_WBWI_INGEST, "rocksdb.number.wbwi.ingest"},
276+
{SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT,
277+
"rocksdb.sst.user.defined.index.load.fail.count"},
276278
};
277279

278280
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

options/options_settable_test.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
203203
"max_auto_readahead_size=0;"
204204
"prepopulate_block_cache=kDisable;"
205205
"initial_auto_readahead_size=0;"
206-
"num_file_reads_for_auto_readahead=0",
206+
"num_file_reads_for_auto_readahead=0;"
207+
"fail_if_no_udi_on_open=true",
207208
new_bbto));
208209

209210
ASSERT_EQ(unset_bytes_base,

table/block_based/block_based_table_factory.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ static struct BlockBasedTableTypeInfo {
399399
{offsetof(struct BlockBasedTableOptions,
400400
num_file_reads_for_auto_readahead),
401401
OptionType::kUInt64T, OptionVerificationType::kNormal}},
402+
{"fail_if_no_udi_on_open",
403+
{offsetof(struct BlockBasedTableOptions, fail_if_no_udi_on_open),
404+
OptionType::kBoolean, OptionVerificationType::kNormal}},
402405
};
403406
}
404407
} block_based_table_type_info;
@@ -874,6 +877,14 @@ std::string BlockBasedTableFactory::GetPrintableOptions() const {
874877
? "nullptr"
875878
: table_options_.filter_policy->Name());
876879
ret.append(buffer);
880+
snprintf(buffer, kBufferSize, " user_defined_index_factory: %s\n",
881+
table_options_.user_defined_index_factory == nullptr
882+
? "nullptr"
883+
: table_options_.user_defined_index_factory->Name());
884+
ret.append(buffer);
885+
snprintf(buffer, kBufferSize, " fail_if_no_udi_on_open: %d\n",
886+
table_options_.fail_if_no_udi_on_open);
887+
ret.append(buffer);
877888
snprintf(buffer, kBufferSize, " whole_key_filtering: %d\n",
878889
table_options_.whole_key_filtering);
879890
ret.append(buffer);

table/block_based/block_based_table_reader.cc

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,33 +1333,62 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
13331333
s = FindMetaBlock(meta_iter, kUserDefinedIndexPrefix + udi_name,
13341334
&udi_block_handle);
13351335
if (!s.ok()) {
1336-
return s;
1337-
}
1338-
// Read the block, and allocate on heap or pin in cache. The UDI block is
1339-
// not compressed. RetrieveBlock will verify the checksum.
1340-
s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
1341-
rep_->decompressor.get(), &rep_->udi_block,
1342-
/*get_context=*/nullptr, lookup_context,
1343-
/*for_compaction=*/false, use_cache, /*async_read=*/false,
1344-
/*use_block_cache_for_lookup=*/false);
1345-
if (!s.ok()) {
1346-
return s;
1336+
RecordTick(rep_->ioptions.statistics.get(),
1337+
SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT);
1338+
if (table_options.fail_if_no_udi_on_open) {
1339+
ROCKS_LOG_ERROR(rep_->ioptions.logger,
1340+
"Failed to find the the UDI block %s in file %s; %s",
1341+
udi_name.c_str(), rep_->file->file_name().c_str(),
1342+
s.ToString().c_str());
1343+
// MAke the status more informative
1344+
s = Status::Corruption(s.ToString(), rep_->file->file_name());
1345+
return s;
1346+
} else {
1347+
// Emit a warning, but ignore the error status
1348+
ROCKS_LOG_WARN(rep_->ioptions.logger,
1349+
"Failed to find the the UDI block %s in file %s; %s",
1350+
udi_name.c_str(), rep_->file->file_name().c_str(),
1351+
s.ToString().c_str());
1352+
s = Status::OK();
1353+
}
13471354
}
1348-
assert(!rep_->udi_block.IsEmpty());
13491355

1350-
std::unique_ptr<UserDefinedIndexReader> udi_reader =
1351-
table_options.user_defined_index_factory->NewReader(
1352-
rep_->udi_block.GetValue()->data);
1353-
index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
1354-
udi_name, std::move(index_reader), std::move(udi_reader));
1356+
// If the UDI block size is 0, that means there's effectively no user
1357+
// defined index. In that case, skip setting up the reader.
1358+
if (udi_block_handle.size() > 0) {
1359+
// Read the block, and allocate on heap or pin in cache. The UDI block is
1360+
// not compressed. RetrieveBlock will verify the checksum.
1361+
if (s.ok()) {
1362+
s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
1363+
rep_->decompressor.get(), &rep_->udi_block,
1364+
/*get_context=*/nullptr, lookup_context,
1365+
/*for_compaction=*/false, use_cache,
1366+
/*async_read=*/false,
1367+
/*use_block_cache_for_lookup=*/false);
1368+
}
1369+
if (s.ok()) {
1370+
assert(!rep_->udi_block.IsEmpty());
1371+
1372+
std::unique_ptr<UserDefinedIndexReader> udi_reader =
1373+
table_options.user_defined_index_factory->NewReader(
1374+
rep_->udi_block.GetValue()->data);
1375+
if (udi_reader) {
1376+
index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
1377+
udi_name, std::move(index_reader), std::move(udi_reader));
1378+
} else {
1379+
s = Status::Corruption("Failed to create UDI reader for " + udi_name +
1380+
" in file " + rep_->file->file_name());
1381+
}
1382+
}
1383+
}
13551384
}
13561385

13571386
rep_->index_reader = std::move(index_reader);
13581387

13591388
// The partitions of partitioned index are always stored in cache. They
13601389
// are hence follow the configuration for pin and prefetch regardless of
13611390
// the value of cache_index_and_filter_blocks
1362-
if (prefetch_all || pin_partition) {
1391+
if (s.ok() && (prefetch_all || pin_partition)) {
13631392
s = rep_->index_reader->CacheDependencies(ro, pin_partition,
13641393
prefetch_buffer);
13651394
}

table/table_test.cc

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7480,6 +7480,9 @@ class UserDefinedIndexTest : public BlockBasedTableTestBase {
74807480
const Slice* first_key_in_next_block,
74817481
const BlockHandle& block_handle,
74827482
std::string* separator_scratch) override {
7483+
if (keys_added_ == 0) {
7484+
return last_key_in_current_block;
7485+
}
74837486
EXPECT_EQ(last_key_in_current_block.size(), 5);
74847487
if (first_key_in_next_block) {
74857488
EXPECT_EQ(first_key_in_next_block->size(), 5);
@@ -7500,12 +7503,19 @@ class UserDefinedIndexTest : public BlockBasedTableTestBase {
75007503

75017504
void OnKeyAdded(const Slice& key, ValueType /*value*/,
75027505
const Slice& /*value*/) override {
7506+
if (key.starts_with("dummy")) {
7507+
return;
7508+
}
75037509
EXPECT_EQ(key.size(), 5);
75047510
// Track keys added to the index
75057511
keys_added_++;
75067512
}
75077513

75087514
Status Finish(Slice* index_contents) override {
7515+
if (entries_added_ == 0) {
7516+
*index_contents = Slice();
7517+
return Status::OK();
7518+
}
75097519
// Serialize the index data
75107520
std::string result;
75117521
for (const auto& entry : index_data_) {
@@ -8020,6 +8030,7 @@ TEST_F(UserDefinedIndexTest, IngestTest) {
80208030

80218031
// Verify that external file ingestion fails if we try to ingest an SST file
80228032
// without the UDI and a UDI factory is configured in BlockBasedTableOptions
8033+
// and fail_if_no_udi_on_open is true in BlockBasedTableOptions.
80238034
TEST_F(UserDefinedIndexTest, IngestFailTest) {
80248035
Options options;
80258036
BlockBasedTableOptions table_options;
@@ -8051,6 +8062,7 @@ TEST_F(UserDefinedIndexTest, IngestFailTest) {
80518062
auto user_defined_index_factory =
80528063
std::make_shared<TestUserDefinedIndexFactory>();
80538064
table_options.user_defined_index_factory = user_defined_index_factory;
8065+
table_options.fail_if_no_udi_on_open = true;
80548066
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
80558067

80568068
std::unique_ptr<DB> db;
@@ -8065,6 +8077,72 @@ TEST_F(UserDefinedIndexTest, IngestFailTest) {
80658077
s = db->IngestExternalFile(cfh, {ingest_file}, ifo);
80668078
ASSERT_NOK(s);
80678079

8080+
ASSERT_OK(db->SetOptions(
8081+
cfh, {{"block_based_table_factory", "{fail_if_no_udi_on_open=false;}"}}));
8082+
s = db->IngestExternalFile(cfh, {ingest_file}, ifo);
8083+
ASSERT_OK(s);
8084+
8085+
ASSERT_OK(db->DestroyColumnFamilyHandle(cfh));
8086+
ASSERT_OK(db->Close());
8087+
ASSERT_OK(DestroyDB(dbname, options));
8088+
}
8089+
8090+
TEST_F(UserDefinedIndexTest, IngestEmptyUDI) {
8091+
Options options;
8092+
BlockBasedTableOptions table_options;
8093+
std::string dbname = test::PerThreadDBPath("user_defined_index_test");
8094+
std::string ingest_file = dbname + "test.sst";
8095+
std::string ingest_file2 = dbname + "dummy.sst";
8096+
8097+
// Set up the user-defined index factory
8098+
auto user_defined_index_factory =
8099+
std::make_shared<TestUserDefinedIndexFactory>();
8100+
table_options.user_defined_index_factory = user_defined_index_factory;
8101+
// Set up custom flush block policy that flushes every 3 keys
8102+
table_options.flush_block_policy_factory =
8103+
std::make_shared<CustomFlushBlockPolicyFactory>();
8104+
8105+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
8106+
8107+
std::unique_ptr<SstFileWriter> writer;
8108+
writer.reset(new SstFileWriter(EnvOptions(), options));
8109+
ASSERT_OK(writer->Open(ingest_file));
8110+
8111+
// Add 100 keys instead of just 5
8112+
for (int i = 0; i < 100; i++) {
8113+
std::stringstream ss;
8114+
ss << std::setw(2) << std::setfill('0') << i;
8115+
std::string key = "key" + ss.str();
8116+
std::string value = "value" + ss.str();
8117+
ASSERT_OK(writer->Put(key, value));
8118+
}
8119+
ASSERT_OK(writer->Finish());
8120+
writer.reset();
8121+
writer.reset(new SstFileWriter(EnvOptions(), options));
8122+
ASSERT_OK(writer->Open(ingest_file2));
8123+
ASSERT_OK(writer->Put("dummy", "val"));
8124+
ASSERT_OK(writer->Finish());
8125+
writer.reset();
8126+
8127+
table_options.fail_if_no_udi_on_open = true;
8128+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
8129+
8130+
std::unique_ptr<DB> db;
8131+
options.create_if_missing = true;
8132+
Status s = DB::Open(options, dbname, &db);
8133+
ASSERT_OK(s);
8134+
ASSERT_TRUE(db != nullptr);
8135+
ColumnFamilyHandle* cfh = nullptr;
8136+
ASSERT_OK(db->CreateColumnFamily(options, "new_cf", &cfh));
8137+
8138+
std::vector<IngestExternalFileArg> ifa;
8139+
ifa.emplace_back();
8140+
ifa[0].column_family = cfh;
8141+
ifa[0].external_files.emplace_back(ingest_file);
8142+
ifa[0].external_files.emplace_back(ingest_file2);
8143+
s = db->IngestExternalFiles(ifa);
8144+
ASSERT_OK(s);
8145+
80688146
ASSERT_OK(db->DestroyColumnFamilyHandle(cfh));
80698147
ASSERT_OK(db->Close());
80708148
ASSERT_OK(DestroyDB(dbname, options));
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add the fail_if_no_udi_on_open flag in BlockBasedTableOption to control whether a missing user defined index block in a SST is a hard error or not.

0 commit comments

Comments
 (0)