Skip to content

Commit

Permalink
librbd/cache/pwl/ssd: solve competition between read and retire
Browse files Browse the repository at this point in the history
SSD read is not like rwl's. SSD need aio read. Therefore,
we cannot guarantee that the data will not be retire
during the period from sending the read request to the SSD
and receiving the data to the memory, which may cause
the corresponding data on the SSD to be overwritten.

Fixes: https://tracker.ceph.com/issues/52236
Signed-off-by: Feng Hualong <hualong.feng@intel.com>
  • Loading branch information
hualongfeng authored and idryomov committed Aug 11, 2021
1 parent 0dedabe commit dfdb452
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 32 deletions.
2 changes: 1 addition & 1 deletion src/librbd/cache/pwl/AbstractWriteLog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ void AbstractWriteLog<I>::read(Extents&& image_extents,
bl->clear();
m_perfcounter->inc(l_librbd_pwl_rd_req, 1);

std::vector<WriteLogCacheEntry*> log_entries_to_read;
std::vector<std::shared_ptr<GenericWriteLogEntry>> log_entries_to_read;
std::vector<bufferlist*> bls_to_read;

m_async_op_tracker.start_op();
Expand Down
4 changes: 2 additions & 2 deletions src/librbd/cache/pwl/AbstractWriteLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,11 +366,11 @@ class AbstractWriteLog {
pwl::DeferredContexts &later) = 0;
virtual void collect_read_extents(
uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
Extent hit_extent, pwl::C_ReadRequest *read_ctx) = 0;
virtual void complete_read(
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, Context *ctx) = 0;
virtual void write_data_to_buffer(
std::shared_ptr<pwl::WriteLogEntry> ws_entry,
Expand Down
8 changes: 0 additions & 8 deletions src/librbd/cache/pwl/LogEntry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,6 @@ void WriteLogEntry::init(bool has_data,
ram_entry.discard = 0;
}

unsigned int WriteLogEntry::reader_count() const {
if (cache_bp.have_raw()) {
return (cache_bp.raw_nref() - bl_refs - 1);
} else {
return 0;
}
}

std::ostream& WriteLogEntry::format(std::ostream &os) const {
os << "(Write) ";
GenericWriteLogEntry::format(os);
Expand Down
2 changes: 1 addition & 1 deletion src/librbd/cache/pwl/LogEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ class WriteLogEntry : public GenericWriteLogEntry {
virtual buffer::list &get_cache_bl() = 0;

BlockExtent block_extent();
unsigned int reader_count() const;
virtual unsigned int reader_count() const = 0;
/* Constructs a new bl containing copies of cache_bp */
void copy_cache_bl(bufferlist *out_bl) override {};
bool can_retire() const override {
Expand Down
8 changes: 8 additions & 0 deletions src/librbd/cache/pwl/rwl/LogEntry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ void WriteLogEntry::copy_cache_bl(bufferlist *out_bl) {
this->init_bl(cloned_bp, *out_bl);
}

unsigned int WriteLogEntry::reader_count() const {
if (cache_bp.have_raw()) {
return (cache_bp.raw_nref() - bl_refs - 1);
} else {
return 0;
}
}

void WriteSameLogEntry::writeback(
librbd::cache::ImageWritebackInterface &image_writeback, Context *ctx) {
bufferlist entry_bl;
Expand Down
1 change: 1 addition & 0 deletions src/librbd/cache/pwl/rwl/LogEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class WriteLogEntry : public pwl::WriteLogEntry {
std::vector<WriteBufferAllocation>::iterator allocation) override;
buffer::list &get_cache_bl() override;
void copy_cache_bl(bufferlist *out_bl) override;
unsigned int reader_count() const override;
};

class WriteSameLogEntry : public WriteLogEntry {
Expand Down
4 changes: 2 additions & 2 deletions src/librbd/cache/pwl/rwl/WriteLog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ WriteLog<I>::~WriteLog() {
template <typename I>
void WriteLog<I>::collect_read_extents(
uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
Extent hit_extent, pwl::C_ReadRequest *read_ctx) {
/* Make a bl for this hit extent. This will add references to the
Expand All @@ -82,7 +82,7 @@ void WriteLog<I>::collect_read_extents(

template <typename I>
void WriteLog<I>::complete_read(
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, Context *ctx) {
ctx->complete(0);
}
Expand Down
4 changes: 2 additions & 2 deletions src/librbd/cache/pwl/rwl/WriteLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ class WriteLog : public AbstractWriteLog<ImageCtxT> {
bool &alloc_succeeds, bool &no_space) override;
void collect_read_extents(
uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
Extent hit_extent, pwl::C_ReadRequest *read_ctx) override;
void complete_read(
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, Context *ctx) override;
bool retire_entries(const unsigned long int frees_per_tx) override;
void persist_last_flushed_sync_gen() override;
Expand Down
5 changes: 5 additions & 0 deletions src/librbd/cache/pwl/ssd/LogEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ class WriteLogEntry : public pwl::WriteLogEntry {
buffer::list &get_cache_bl() override;
void remove_cache_bl() override;
unsigned int get_aligned_data_size() const override;
void inc_bl_refs() { bl_refs++; };
void dec_bl_refs() { bl_refs--; };
unsigned int reader_count() const override {
return bl_refs;
}
};

class WriteSameLogEntry : public WriteLogEntry {
Expand Down
28 changes: 17 additions & 11 deletions src/librbd/cache/pwl/ssd/WriteLog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ WriteLog<I>::~WriteLog() {
template <typename I>
void WriteLog<I>::collect_read_extents(
uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read,
uint64_t entry_hit_length, Extent hit_extent,
pwl::C_ReadRequest *read_ctx) {
Expand All @@ -87,14 +87,15 @@ void WriteLog<I>::collect_read_extents(
if (!hit_bl.length()) {
ldout(m_image_ctx.cct, 5) << "didn't hit RAM" << dendl;
auto read_extent = read_ctx->read_extents.back();
log_entries_to_read.push_back(&write_entry->ram_entry);
write_entry->inc_bl_refs();
log_entries_to_read.push_back(std::move(write_entry));
bls_to_read.push_back(&read_extent->m_bl);
}
}

template <typename I>
void WriteLog<I>::complete_read(
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read,
Context *ctx) {
if (!log_entries_to_read.empty()) {
Expand Down Expand Up @@ -510,7 +511,9 @@ Context* WriteLog<I>::construct_flush_entry_ctx(
});
ctx = new LambdaContext(
[this, log_entry, read_bl_ptr, ctx](int r) {
aio_read_data_block(&log_entry->ram_entry, read_bl_ptr, ctx);
auto write_entry = static_pointer_cast<WriteLogEntry>(log_entry);
write_entry->inc_bl_refs();
aio_read_data_block(std::move(write_entry), read_bl_ptr, ctx);
});
return ctx;
} else {
Expand Down Expand Up @@ -957,37 +960,40 @@ int WriteLog<I>::update_pool_root_sync(
}

template <typename I>
void WriteLog<I>::aio_read_data_block(WriteLogCacheEntry *log_entry,
void WriteLog<I>::aio_read_data_block(std::shared_ptr<GenericWriteLogEntry> log_entry,
bufferlist *bl, Context *ctx) {
std::vector<WriteLogCacheEntry*> log_entries {log_entry};
std::vector<std::shared_ptr<GenericWriteLogEntry>> log_entries = {std::move(log_entry)};
std::vector<bufferlist *> bls {bl};
aio_read_data_blocks(log_entries, bls, ctx);
}

template <typename I>
void WriteLog<I>::aio_read_data_blocks(
std::vector<WriteLogCacheEntry*> &log_entries,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries,
std::vector<bufferlist *> &bls, Context *ctx) {
ceph_assert(log_entries.size() == bls.size());

//get the valid part
Context *read_ctx = new LambdaContext(
[this, log_entries, bls, ctx](int r) {
[log_entries, bls, ctx](int r) {
for (unsigned int i = 0; i < log_entries.size(); i++) {
bufferlist valid_data_bl;
auto length = log_entries[i]->is_write() ? log_entries[i]->write_bytes :
log_entries[i]->ws_datalen;
auto write_entry = static_pointer_cast<WriteLogEntry>(log_entries[i]);
auto length = write_entry->ram_entry.is_write() ? write_entry->ram_entry.write_bytes
: write_entry->ram_entry.ws_datalen;

valid_data_bl.substr_of(*bls[i], 0, length);
bls[i]->clear();
bls[i]->append(valid_data_bl);
write_entry->dec_bl_refs();
}
ctx->complete(r);
});

CephContext *cct = m_image_ctx.cct;
AioTransContext *aio = new AioTransContext(cct, read_ctx);
for (unsigned int i = 0; i < log_entries.size(); i++) {
auto log_entry = log_entries[i];
WriteLogCacheEntry *log_entry = &log_entries[i]->ram_entry;

ceph_assert(log_entry->is_write() || log_entry->is_writesame());
uint64_t len = log_entry->is_write() ? log_entry->write_bytes :
Expand Down
10 changes: 5 additions & 5 deletions src/librbd/cache/pwl/ssd/WriteLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ class WriteLog : public AbstractWriteLog<ImageCtxT> {
void load_existing_entries(pwl::DeferredContexts &later);
void collect_read_extents(
uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
Extent hit_extent, pwl::C_ReadRequest *read_ctx) override;
void complete_read(
std::vector<WriteLogCacheEntry*> &log_entries_to_read,
std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
std::vector<bufferlist*> &bls_to_read, Context *ctx) override;
void enlist_op_appender();
bool retire_entries(const unsigned long int frees_per_tx);
Expand All @@ -132,9 +132,9 @@ class WriteLog : public AbstractWriteLog<ImageCtxT> {
int update_pool_root_sync(std::shared_ptr<pwl::WriteLogPoolRoot> root);
void update_pool_root(std::shared_ptr<WriteLogPoolRoot> root,
AioTransContext *aio);
void aio_read_data_block(WriteLogCacheEntry *log_entry, bufferlist *bl,
Context *ctx);
void aio_read_data_blocks(std::vector<WriteLogCacheEntry*> &log_entries,
void aio_read_data_block(std::shared_ptr<GenericWriteLogEntry> log_entry,
bufferlist *bl, Context *ctx);
void aio_read_data_blocks(std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries,
std::vector<bufferlist *> &bls, Context *ctx);
static void aio_cache_cb(void *priv, void *priv2) {
AioTransContext *c = static_cast<AioTransContext*>(priv2);
Expand Down

0 comments on commit dfdb452

Please sign in to comment.