-
Notifications
You must be signed in to change notification settings - Fork 6.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cache the mapping of sequence to log block index in transaction log iterator #12538
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,7 +43,8 @@ Reader::Reader(std::shared_ptr<Logger> info_log, | |
compression_type_record_read_(false), | ||
uncompress_(nullptr), | ||
hash_state_(nullptr), | ||
uncompress_hash_state_(nullptr){} | ||
uncompress_hash_state_(nullptr), | ||
may_skip_first_fragmented_record_(false) {} | ||
|
||
Reader::~Reader() { | ||
delete[] backing_store_; | ||
|
@@ -112,6 +113,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, | |
*record = fragment; | ||
last_record_offset_ = prospective_record_offset; | ||
first_record_read_ = true; | ||
may_skip_first_fragmented_record_ = false; | ||
return true; | ||
|
||
case kFirstType: | ||
|
@@ -130,13 +132,16 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, | |
prospective_record_offset = physical_record_offset; | ||
scratch->assign(fragment.data(), fragment.size()); | ||
in_fragmented_record = true; | ||
may_skip_first_fragmented_record_ = false; | ||
break; | ||
|
||
case kMiddleType: | ||
case kRecyclableMiddleType: | ||
if (!in_fragmented_record) { | ||
ReportCorruption(fragment.size(), | ||
"missing start of fragmented record(1)"); | ||
if (!may_skip_first_fragmented_record_) { | ||
ReportCorruption(fragment.size(), | ||
"missing start of fragmented record(1)"); | ||
} | ||
} else { | ||
if (record_checksum != nullptr) { | ||
XXH3_64bits_update(hash_state_, fragment.data(), fragment.size()); | ||
|
@@ -148,8 +153,11 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, | |
case kLastType: | ||
case kRecyclableLastType: | ||
if (!in_fragmented_record) { | ||
ReportCorruption(fragment.size(), | ||
"missing start of fragmented record(2)"); | ||
if (!may_skip_first_fragmented_record_) { | ||
ReportCorruption(fragment.size(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can also clear skipped_ here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done, thx for review |
||
"missing start of fragmented record(2)"); | ||
} | ||
may_skip_first_fragmented_record_ = false; | ||
} else { | ||
if (record_checksum != nullptr) { | ||
XXH3_64bits_update(hash_state_, fragment.data(), fragment.size()); | ||
|
@@ -192,6 +200,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, | |
scratch->size(), | ||
"user-defined timestamp size record interspersed partial record"); | ||
} | ||
may_skip_first_fragmented_record_ = false; | ||
prospective_record_offset = physical_record_offset; | ||
scratch->clear(); | ||
last_record_offset_ = prospective_record_offset; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,8 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl( | |
const TransactionLogIterator::ReadOptions& read_options, | ||
const EnvOptions& soptions, const SequenceNumber seq, | ||
std::unique_ptr<VectorWalPtr> files, VersionSet const* const versions, | ||
const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer) | ||
const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer, | ||
const std::shared_ptr<TransactionLogSeqCache>& transaction_log_seq_cache) | ||
: dir_(dir), | ||
options_(options), | ||
read_options_(read_options), | ||
|
@@ -32,6 +33,7 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl( | |
started_(false), | ||
is_valid_(false), | ||
current_file_index_(0), | ||
transaction_log_seq_cache_(transaction_log_seq_cache), | ||
current_batch_seq_(0), | ||
current_last_seq_(0) { | ||
assert(files_ != nullptr); | ||
|
@@ -113,8 +115,13 @@ void TransactionLogIteratorImpl::SeekToStartSequence(uint64_t start_file_index, | |
} else if (!current_status_.ok()) { | ||
return; | ||
} | ||
Status s = | ||
OpenLogReader(files_->at(static_cast<size_t>(start_file_index)).get()); | ||
auto& file = files_->at(static_cast<size_t>(start_file_index)); | ||
uint64_t hint_block_index{0}; | ||
if (read_options_.with_cache_) { | ||
transaction_log_seq_cache_->Lookup( | ||
file->LogNumber(), starting_sequence_number_, &hint_block_index); | ||
} | ||
Status s = OpenLogReader(file.get(), hint_block_index * log::kBlockSize); | ||
if (!s.ok()) { | ||
current_status_ = s; | ||
reporter_.Info(current_status_.ToString().c_str()); | ||
|
@@ -207,7 +214,13 @@ void TransactionLogIteratorImpl::NextImpl(bool internal) { | |
// Open the next file | ||
if (current_file_index_ < files_->size() - 1) { | ||
++current_file_index_; | ||
Status s = OpenLogReader(files_->at(current_file_index_).get()); | ||
auto& file = files_->at(static_cast<size_t>(current_file_index_)); | ||
uint64_t hint_block_index{0}; | ||
if (read_options_.with_cache_) { | ||
transaction_log_seq_cache_->Lookup( | ||
file->LogNumber(), starting_sequence_number_, &hint_block_index); | ||
} | ||
Status s = OpenLogReader(file.get(), hint_block_index * log::kBlockSize); | ||
if (!s.ok()) { | ||
is_valid_ = false; | ||
current_status_ = s; | ||
|
@@ -276,12 +289,28 @@ void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) { | |
// currentBatchSeq_ can only change here | ||
assert(current_last_seq_ <= versions_->LastSequence()); | ||
|
||
if (read_options_.with_cache_) { | ||
// cache the mapping of sequence to log block index when seeking to the | ||
// start or end sequence | ||
if ((current_batch_seq_ <= starting_sequence_number_ && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the use case to cache for the start sequence? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When there are multiple slaves catching up, the start sequence may not necessarily be incremental; Also, there may be scenarios that do not iterate to the end sequence but calling GetUpdateSince frequently. Cache at the beginning and end sounds reasonable for me |
||
current_last_seq_ >= starting_sequence_number_) || | ||
current_last_seq_ == versions_->LastSequence()) { | ||
transaction_log_seq_cache_->Insert( | ||
current_log_reader_->GetLogNumber(), current_batch_seq_, | ||
current_log_reader_->LastRecordOffset() / log::kBlockSize); | ||
} | ||
} | ||
|
||
TEST_SYNC_POINT_CALLBACK("UpdateCurrentWriteBatch:TransactionLogIteratorImpl", | ||
current_log_reader_.get()); | ||
|
||
current_batch_ = std::move(batch); | ||
is_valid_ = true; | ||
current_status_ = Status::OK(); | ||
} | ||
|
||
Status TransactionLogIteratorImpl::OpenLogReader(const WalFile* log_file) { | ||
Status TransactionLogIteratorImpl::OpenLogReader(const WalFile* log_file, | ||
uint64_t hint_offset) { | ||
std::unique_ptr<SequentialFileReader> file; | ||
Status s = OpenLogFile(log_file, &file); | ||
if (!s.ok()) { | ||
|
@@ -291,6 +320,11 @@ Status TransactionLogIteratorImpl::OpenLogReader(const WalFile* log_file) { | |
current_log_reader_.reset( | ||
new log::Reader(options_->info_log, std::move(file), &reporter_, | ||
read_options_.verify_checksums_, log_file->LogNumber())); | ||
if (hint_offset > 0) { | ||
TEST_SYNC_POINT_CALLBACK("TransactionLogIteratorImpl:OpenLogReader:Skip", | ||
&hint_offset); | ||
return current_log_reader_->Skip(hint_offset); | ||
} | ||
return Status::OK(); | ||
} | ||
} // namespace ROCKSDB_NAMESPACE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test would pass before this change too. Maybe check that cache is effective by check some statistics on IO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update it, thx for review