From 423ff38142bb84fbc4eed4174b532d569bbd6a44 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Sat, 3 May 2025 20:34:35 -0700 Subject: [PATCH 1/8] Add framework for the io operations in volume manager --- src/lib/homeblks_impl.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index 98941d6..993373f 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -115,6 +115,18 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab // with part_of_batchis set to true. void submit_io_batch() final; + + NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req, + bool part_of_batch = false) final; + + NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req, + bool part_of_batch = false) final; + + NullAsyncResult unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) final; + + void submit_io_batch() final; + + // see api comments in base class; bool get_stats(volume_id_t id, VolumeStats& stats) const final; void get_volume_ids(std::vector< volume_id_t >& vol_ids) const final; From f3f9d8762329cd6a58cad46d86d0c99271889844 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Thu, 8 May 2025 11:30:00 -0700 Subject: [PATCH 2/8] read API implementation --- src/include/homeblks/volume_mgr.hpp | 11 +- src/lib/homeblks_impl.hpp | 18 +--- src/lib/index.cpp | 14 ++- src/lib/tests/CMakeLists.txt | 2 +- src/lib/volume/index.hpp | 9 +- src/lib/volume/tests/test_common.hpp | 8 +- src/lib/volume/tests/test_volume.cpp | 6 +- src/lib/volume/tests/test_volume_io.cpp | 80 ++++++++++++++- src/lib/volume_mgr.cpp | 128 ++++++++++++++++++++---- 9 files changed, 228 insertions(+), 48 deletions(-) diff --git a/src/include/homeblks/volume_mgr.hpp b/src/include/homeblks/volume_mgr.hpp index 12c7b22..86784d2 100644 --- a/src/include/homeblks/volume_mgr.hpp +++ b/src/include/homeblks/volume_mgr.hpp @@ -36,6 +36,7 @@ struct vol_interface_req : public sisl::ObjLifeCounter< vol_interface_req > { virtual ~vol_interface_req() = default; // override; sisl::ObjLifeCounter should have virtual destructor virtual void free_yourself() { delete this; } + lba_t end_lba() const { return lba + nlbas - 1; } }; using vol_interface_req_ptr = boost::intrusive_ptr< vol_interface_req >; @@ -95,14 +96,13 @@ class VolumeManager : public Manager< VolumeError > { * * @param vol Pointer to the volume * @param req Request created which contains all the write parameters - * @param part_of_batch Is this request part of a batch request. If so, implementation can wait for batch_submit + * req.part_of_batch field can be used if this request is part of a batch request. If so, implementation can wait for batch_submit * call before issuing the writes. IO might already be started or even completed (in case of errors) before * batch_sumbit call, so application cannot assume IO will be started only after submit_batch call. * * @return std::error_condition no_error or error in issuing writes */ - virtual NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req, - bool part_of_batch = false) = 0; + virtual NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req) = 0; /** * @brief Read the data from the volume asynchronously, created from the request. After completion the attached @@ -110,14 +110,13 @@ class VolumeManager : public Manager< VolumeError > { * * @param vol Pointer to the volume * @param req Request created which contains all the read parameters - * @param part_of_batch Is this request part of a batch request. If so, implementation can wait for batch_submit + * req.part_of_batch field can be used if this request is part of a batch request. If so, implementation can wait for batch_submit * call before issuing the reads. IO might already be started or even completed (in case of errors) before * batch_sumbit call, so application cannot assume IO will be started only after submit_batch call. * * @return std::error_condition no_error or error in issuing reads */ - virtual NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req, - bool part_of_batch = false) = 0; + virtual NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req) = 0; /** * @brief unmap the given block range diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index 993373f..aac9602 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -105,9 +105,9 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab VolumePtr lookup_volume(const volume_id_t& id) final; - NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req, bool part_of_batch = false) final; + NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req) final; - NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req, bool part_of_batch = false) final; + NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req) final; NullAsyncResult unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) final; @@ -115,18 +115,6 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab // with part_of_batchis set to true. void submit_io_batch() final; - - NullAsyncResult write(const VolumePtr& vol, const vol_interface_req_ptr& req, - bool part_of_batch = false) final; - - NullAsyncResult read(const VolumePtr& vol, const vol_interface_req_ptr& req, - bool part_of_batch = false) final; - - NullAsyncResult unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) final; - - void submit_io_batch() final; - - // see api comments in base class; bool get_stats(volume_id_t id, VolumeStats& stats) const final; void get_volume_ids(std::vector< volume_id_t >& vol_ids) const final; @@ -162,6 +150,8 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab VolumeManager::Result< folly::Unit > write_to_index(const VolumePtr& vol_ptr, lba_t start_lba, lba_t end_lba, std::unordered_map< lba_t, BlockInfo >& blocks_info); + VolumeManager::Result< folly::Unit > read_from_index(const VolumePtr& vol_ptr, const vol_interface_req_ptr& req, + std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >& out_vector); }; class HBIndexSvcCB : public homestore::IndexServiceCallbacks { diff --git a/src/lib/index.cpp b/src/lib/index.cpp index 093f8d1..459cb5d 100644 --- a/src/lib/index.cpp +++ b/src/lib/index.cpp @@ -33,7 +33,7 @@ HomeBlocksImpl::write_to_index(const VolumePtr& vol_ptr, lba_t start_lba, lba_t // For value shift() will get the blk_num and checksum for each lba. IndexValueContext app_ctx{&blocks_info, start_lba}; const BlkId& start_blkid = blocks_info[start_lba].new_blkid; - VolumeIndexValue value{start_blkid}; + VolumeIndexValue value{start_blkid, blocks_info[start_lba].checksum}; auto req = homestore::BtreeRangePutRequest< VolumeIndexKey >{ homestore::BtreeKeyRange< VolumeIndexKey >{VolumeIndexKey{start_lba}, true, VolumeIndexKey{end_lba}, true}, @@ -51,4 +51,16 @@ HomeBlocksImpl::write_to_index(const VolumePtr& vol_ptr, lba_t start_lba, lba_t return folly::Unit(); } +VolumeManager::Result< folly::Unit > HomeBlocksImpl::read_from_index(const VolumePtr& vol_ptr, const vol_interface_req_ptr& req, + std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >& out_vector) { + homestore::BtreeQueryRequest< VolumeIndexKey > qreq{homestore::BtreeKeyRange< VolumeIndexKey >{VolumeIndexKey{req->lba}, + VolumeIndexKey{req->end_lba()}}, homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY}; + auto index_table = vol_ptr->indx_table(); + RELEASE_ASSERT(index_table != nullptr, "Index table is null for volume id: {}", boost::uuids::to_string(vol_ptr->id())); + if (auto ret = index_table->query(qreq, out_vector); ret != homestore::btree_status_t::success) { + return folly::makeUnexpected(VolumeError::INDEX_ERROR); + } + return folly::Unit(); +} + } // namespace homeblocks \ No newline at end of file diff --git a/src/lib/tests/CMakeLists.txt b/src/lib/tests/CMakeLists.txt index ada0cf8..29f85bd 100644 --- a/src/lib/tests/CMakeLists.txt +++ b/src/lib/tests/CMakeLists.txt @@ -6,4 +6,4 @@ target_sources(test_fixture PRIVATE ) target_link_libraries(test_fixture ${COMMON_TEST_DEPS} -) +) \ No newline at end of file diff --git a/src/lib/volume/index.hpp b/src/lib/volume/index.hpp index bccdeb8..bc3aac3 100644 --- a/src/lib/volume/index.hpp +++ b/src/lib/volume/index.hpp @@ -188,15 +188,18 @@ class VolumeIndexValue : public homestore::BtreeIntervalValue { #pragma pack() public: - VolumeIndexValue(const BlkId& base_blkid) : homestore::BtreeIntervalValue() { + VolumeIndexValue(const BlkId& base_blkid, homestore::csum_t csum) : homestore::BtreeIntervalValue() { m_blkid_suffix = uint32_cast(base_blkid.to_integer() & 0xFFFFFFFF) >> 1; m_blkid_prefix = uint32_cast(base_blkid.to_integer() >> 32); + m_checksum = csum; } + VolumeIndexValue(const BlkId& base_blkid) : VolumeIndexValue(base_blkid, 0) {} VolumeIndexValue() = default; VolumeIndexValue(const VolumeIndexValue& other) : homestore::BtreeIntervalValue(), m_blkid_prefix(other.m_blkid_prefix), - m_blkid_suffix(other.m_blkid_suffix) {} + m_blkid_suffix(other.m_blkid_suffix), + m_checksum(other.m_checksum) {} VolumeIndexValue(const sisl::blob& b, bool copy) : homestore::BtreeIntervalValue() { this->deserialize(b, copy); } virtual ~VolumeIndexValue() = default; @@ -280,4 +283,4 @@ class VolumeIndexValue : public homestore::BtreeIntervalValue { (m_checksum == other.m_checksum)); } }; -} // namespace homeblocks +} // namespace homeblocks \ No newline at end of file diff --git a/src/lib/volume/tests/test_common.hpp b/src/lib/volume/tests/test_common.hpp index 1859c2d..730c680 100644 --- a/src/lib/volume/tests/test_common.hpp +++ b/src/lib/volume/tests/test_common.hpp @@ -213,6 +213,12 @@ class HBTestHelper { } } + static void validate_zeros(uint8_t const* buf, uint64_t size) { + sisl::io_blob_safe blob(size, 512); + std::memset(blob.bytes(), 0, size); + RELEASE_ASSERT_EQ(std::memcmp(buf, blob.bytes(), size), 0, "data_buf mismatch"); + } + private: void init_devices(bool is_file, uint64_t dev_size = 0) { if (is_file) { @@ -277,4 +283,4 @@ class HBTestHelper { Waiter waiter_; }; -} // namespace test_common +} // namespace test_common \ No newline at end of file diff --git a/src/lib/volume/tests/test_volume.cpp b/src/lib/volume/tests/test_volume.cpp index 32d9907..eb76254 100644 --- a/src/lib/volume/tests/test_volume.cpp +++ b/src/lib/volume/tests/test_volume.cpp @@ -26,8 +26,8 @@ SISL_LOGGING_INIT(HOMEBLOCKS_LOG_MODS) SISL_OPTION_GROUP(test_volume_setup, - (num_vols, "", "num_vols", "number of volumes", ::cxxopts::value< uint32_t >()->default_value("2"), - "number")); + (num_vols, "", "num_vols", "number of volumes", ::cxxopts::value< uint32_t >()->default_value("2"), + "number")); SISL_OPTIONS_ENABLE(logging, test_common_setup, test_volume_setup, homeblocks) SISL_LOGGING_DECL(test_volume) @@ -193,4 +193,4 @@ int main(int argc, char* argv[]) { g_helper->teardown(); return ret; -} +} \ No newline at end of file diff --git a/src/lib/volume/tests/test_volume_io.cpp b/src/lib/volume/tests/test_volume_io.cpp index b134edf..bd8fd9c 100644 --- a/src/lib/volume/tests/test_volume_io.cpp +++ b/src/lib/volume/tests/test_volume_io.cpp @@ -174,6 +174,7 @@ class VolumeIOImpl { }); } +<<<<<<< HEAD void verify_all_data() { for (auto& [lba, data_pattern] : m_lba_data) { auto buffer = iomanager.iobuf_alloc(512, 4096); @@ -185,7 +186,46 @@ class VolumeIOImpl { LOGDEBUG("Verify data vol={} lba={} pattern={} {}", m_vol_name, lba, data_pattern, *r_cast< uint64_t* >(buffer)); iomanager.iobuf_free(buffer); +======= + void read_and_verify(lba_t start_lba, uint32_t nlbas) { + auto sz = nlbas * m_vol_ptr->info()->page_size; + sisl::io_blob_safe read_blob(sz, 512); + auto buf = read_blob.bytes(); + vol_interface_req_ptr req(new vol_interface_req{buf, start_lba, nlbas}); + auto read_resp = g_helper->inst()->volume_manager()->read(m_vol_ptr, req).get(); + if(read_resp.hasError()) { + LOGERROR("Read failed with error={}", read_resp.error()); } + RELEASE_ASSERT(!read_resp.hasError(), "Read failed with error={}", read_resp.error()); + auto read_sz = m_vol_ptr->info()->page_size; + for(auto lba = start_lba; lba < start_lba + nlbas; lba++, buf += read_sz) { + uint64_t data_pattern = 0; + if(auto it = m_lba_data.find(lba); it != m_lba_data.end()) { + data_pattern = it->second; + test_common::HBTestHelper::validate_data_buf(buf, m_vol_ptr->info()->page_size, data_pattern); + } else { + test_common::HBTestHelper::validate_zeros(buf, m_vol_ptr->info()->page_size); + } + + LOGDEBUG("Verify data lba={} pattern expected={} actual={}", lba, data_pattern, *r_cast< uint64_t* >(read_blob.bytes())); + } + } + + void verify_data(uint64_t nlbas_per_io = 1) { + auto start_lba = m_lba_data.begin()->first; + auto max_lba = m_lba_data.rbegin()->first; + verify_data(start_lba, max_lba, nlbas_per_io); + } + + void verify_data(lba_t start_lba, lba_t max_lba, uint64_t nlbas_per_io) { + uint64_t num_lbas_verified = 0; + for(auto lba = start_lba; lba < max_lba; lba += nlbas_per_io) { + auto num_lbas_this_round = std::min(nlbas_per_io, max_lba - lba); + read_and_verify(lba, num_lbas_this_round); + num_lbas_verified += num_lbas_this_round; +>>>>>>> read API implementation + } + LOGINFO("Verified {} lbas for volume {}", num_lbas_verified, m_vol_ptr->info()->name); } #ifdef _PRERELEASE @@ -238,7 +278,6 @@ class VolumeIOTest : public ::testing::Test { // Get a random volume. vol = m_vols_impl[rand() % m_vols_impl.size()]; } - vol->generate_io(start_lba, nblks); }); @@ -246,13 +285,20 @@ class VolumeIOTest : public ::testing::Test { LOGINFO("IO completed"); } +<<<<<<< HEAD void verify_all_data(shared< VolumeIOImpl > vol_impl = nullptr) { if (vol_impl) { vol_impl->verify_all_data(); +======= + void verify_data(shared< VolumeIOImpl > vol_impl = nullptr, uint64_t nlbas_per_io = 1) { + if (vol_impl) { + vol_impl->verify_data(nlbas_per_io); +>>>>>>> read API implementation return; } for (auto& vol_impl : m_vols_impl) { +<<<<<<< HEAD vol_impl->verify_all_data(); } } @@ -261,6 +307,9 @@ class VolumeIOTest : public ::testing::Test { g_helper->restart(shutdown_delay); for (auto& vol_impl : m_vols_impl) { vol_impl->reset(); +======= + vol_impl->verify_data(nlbas_per_io); +>>>>>>> read API implementation } } @@ -286,18 +335,47 @@ TEST_F(VolumeIOTest, SingleVolumeWriteData) { restart(5); LOGINFO("Verify data"); +<<<<<<< HEAD verify_all_data(vol); +======= + verify_data(vol, 30 /* nlbas_per_io */); +>>>>>>> read API implementation // Write and verify again on same LBA range to single volume multiple times. LOGINFO("Write and verify data with num_iter={} start={} nblks={}", num_iter, start_lba, nblks); for (uint32_t i = 0; i < num_iter; i++) { generate_io_single(vol, start_lba, nblks); } +<<<<<<< HEAD verify_all_data(vol); +======= + verify_data(vol); + + // verify random lba ranges + +>>>>>>> read API implementation LOGINFO("SingleVolumeWriteData test done."); } +TEST_F(VolumeIOTest, SingleVolumeReadData) { + // Write and verify fixed LBA range to single volume multiple times. + auto vol = volume_list().back(); + uint32_t nblks = 500; + lba_t start_lba = 1000; + uint32_t num_iter = 1; + LOGINFO("Write and verify data with num_iter={} start={} nblks={}", num_iter, start_lba, nblks); + for (uint32_t i = 0; i < num_iter; i++) { + generate_io_single(vol, start_lba, nblks); + } + + vol->verify_data(300, 800, 40); + vol->verify_data(2000, 3000, 40); + vol->verify_data(800, 1800, 40); + + LOGINFO("SingleVolumeReadHoles test done."); +} + TEST_F(VolumeIOTest, MultipleVolumeWriteData) { LOGINFO("Write data randomly on num_vols={} num_io={}", SISL_OPTIONS["num_vols"].as< uint32_t >(), SISL_OPTIONS["num_io"].as< uint64_t >()); diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index ed8e79a..7e2537a 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -21,6 +21,28 @@ namespace homeblocks { +static VolumeError to_volume_error(std::error_code ec) { + switch (ec.value()) { + default: + return VolumeError::UNKNOWN; + } +} + +static void submit_read_to_backend(uint8_t*& read_buf, std::vector< folly::Future< std::error_code > >& futs, + const std::pair< VolumeIndexKey, VolumeIndexValue >& first_blk_in_contiguous_range, uint32_t blk_count, + const VolumePtr& vol, bool part_of_batch) { + // construct the blkid + auto blk_num = first_blk_in_contiguous_range.second.blkid().blk_num(); + auto chunk_num = first_blk_in_contiguous_range.second.blkid().chunk_num(); + auto blkid = homestore::MultiBlkId(blk_num, blk_count, chunk_num); + sisl::sg_list sgs; + auto size = blk_count * vol->rd()->get_blk_size(); + sgs.size = size; + sgs.iovs.emplace_back(iovec{.iov_base = read_buf, .iov_len = size}); + read_buf += size; + futs.emplace_back(vol->rd()->async_read(blkid, sgs, size, part_of_batch)); +} + std::shared_ptr< VolumeManager > HomeBlocksImpl::volume_manager() { return shared_from_this(); } void HomeBlocksImpl::on_vol_meta_blk_found(sisl::byte_view const& buf, void* cookie) { @@ -134,8 +156,7 @@ bool HomeBlocksImpl::get_stats(volume_id_t id, VolumeStats& stats) const { retur void HomeBlocksImpl::get_volume_ids(std::vector< volume_id_t >& vol_ids) const {} -VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, const vol_interface_req_ptr& vol_req, - bool part_of_batch) { +VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, const vol_interface_req_ptr& vol_req) { // Step 1. Allocate new blkids. Homestore might return multiple blkid's pointing // to different contigious memory locations. @@ -152,7 +173,7 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, c data_sgs.iovs.emplace_back(iovec{.iov_base = vol_req->buffer, .iov_len = data_size}); data_sgs.size = data_size; return vol_ptr->rd() - ->async_write(new_blkids, data_sgs, part_of_batch) + ->async_write(new_blkids, data_sgs, vol_req->part_of_batch) .thenValue([this, vol_ptr, vol_req, new_blkids = std::move(new_blkids)](auto&& result) -> VolumeManager::NullAsyncResult { if (result) { return folly::makeUnexpected(VolumeError::DRIVE_WRITE_ERROR); } @@ -234,22 +255,93 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, c }); } -VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol_ptr, const vol_interface_req_ptr& vol_req, - bool part_of_batch) { - // TODO remove when read is merged. - auto data_size = vol_req->nlbas * vol_ptr->rd()->get_blk_size(); - sisl::sg_list data_sgs; - data_sgs.iovs.emplace_back(iovec{.iov_base = vol_req->buffer, .iov_len = data_size}); - data_sgs.size = data_size; +VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const vol_interface_req_ptr& req) { + // TODO: check if the system is accepting ios (shutdown in progress etc) + RELEASE_ASSERT(vol != nullptr, "VolumePtr is null"); + // Step 1: get the blk ids from index table + std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > out_vector; + if(auto index_resp = read_from_index(vol, req, out_vector); index_resp.hasError()) { + LOGE("Failed to read from index table for range=[{}, {}], volume id: {}, error: {}", + req->lba, req->end_lba(), boost::uuids::to_string(vol->id()), index_resp.error()); + return index_resp; + } + + // Step 2: Consolidate the blk ids and issue read requests + std::vector< folly::Future< std::error_code > > futs; + auto* read_buf = req->buffer; + DEBUG_ASSERT(read_buf != nullptr, "Read buffer is null"); + auto cur_lba = req->lba; + for (uint32_t i = 0, blk_count = 0, start_idx = 0; i < out_vector.size(); ++i, ++cur_lba) { + auto const& [key, value] = out_vector[i]; + // cur_lba is used to keep track of the holes + // fill the read buffer with zeroes for the holes + if(cur_lba != key.key()) { + if(blk_count > 0) { + // submit the read for the previous blkids + submit_read_to_backend(read_buf, futs, out_vector[start_idx], blk_count, vol, req->part_of_batch); + start_idx = i; + blk_count = 0; + } + auto fill_size = (key.key() - cur_lba) * vol->rd()->get_blk_size(); + std::memset(read_buf, 0, fill_size); + read_buf += fill_size; + cur_lba = key.key(); + } - VolumeIndexKey key(vol_req->lba); - VolumeIndexValue value; - homestore::BtreeSingleGetRequest get_req(&key, &value); - auto ret = vol_ptr->indx_table()->get(get_req); - RELEASE_ASSERT(ret == homestore::btree_status_t::success, "Cant find lba"); - auto err = vol_ptr->rd()->async_read(value.blkid(), data_sgs, data_size).get(); - RELEASE_ASSERT(!err, "async_read failed"); - return folly::Unit(); + // Contiguous blkids are merged into a single read request + bool is_contiguous = (i == 0 || value.blkid().blk_num() == out_vector[i-1].second.blkid().blk_num() + 1); + if(is_contiguous) { + blk_count++; + if(i < out_vector.size() - 1) { + continue; + } + } + // submit the read for the previous blkids + submit_read_to_backend(read_buf, futs, out_vector[start_idx], blk_count, vol, req->part_of_batch); + if(out_vector[start_idx].second.blkid().blk_num() + blk_count - 1 == value.blkid().blk_num()) { + // this is the last entry in the out vector + continue; + } + + // reset the blkids and size for the next read + blk_count = 0; + start_idx = i; + if(i == out_vector.size() - 1) { + submit_read_to_backend(read_buf, futs, out_vector[start_idx], 1, vol, req->part_of_batch); + } + } + // fill the holes at the end of the read buffer + if(cur_lba != req->end_lba() + 1) { + std::memset(read_buf, 0, (req->end_lba() - cur_lba + 1) * vol->rd()->get_blk_size()); + } + + return out_vector.empty() ? folly::Unit() : + folly::collectAllUnsafe(futs).thenValue([out_vector = std::move(out_vector), buf = req->buffer + , start_lba = req->lba, blk_size = vol->rd()->get_blk_size()](auto&& vf) -> VolumeManager::Result< folly::Unit > { + for (auto const& err_c : vf) { + if (sisl_unlikely(err_c.value())) { + auto ec = err_c.value(); + return folly::makeUnexpected(to_volume_error(ec)); + } + // verify the checksum + auto read_buf = buf; + for(uint64_t cur_lba = start_lba, i = 0; i < out_vector.size(); ++i, ++cur_lba) { + auto const& [key, value] = out_vector[i]; + // ignore the holes + if(cur_lba != key.key()) { + read_buf += (key.key() - cur_lba) * blk_size; + cur_lba = key.key(); + } + auto checksum = crc16_t10dif(init_crc_16, static_cast< unsigned char* >(read_buf), blk_size); + if(checksum != value.checksum()) { + LOGE("crc mismatch for lba: {}, blk id {}, expected: {}, actual: {}", cur_lba, value.blkid().to_string(), value.checksum(), checksum); + return folly::makeUnexpected(VolumeError::CRC_MISMATCH); + } + read_buf += blk_size; + } + } + return folly::Unit(); + }); } VolumeManager::NullAsyncResult HomeBlocksImpl::unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) { From 0fd0b78f467b6eae9d39b69c9272af6fa2a25962 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Fri, 16 May 2025 15:11:30 -0700 Subject: [PATCH 3/8] No need to set zeroes for lbas not in use. --- src/lib/volume/tests/test_volume_io.cpp | 2 -- src/lib/volume_mgr.cpp | 11 +++-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/lib/volume/tests/test_volume_io.cpp b/src/lib/volume/tests/test_volume_io.cpp index bd8fd9c..aa7063e 100644 --- a/src/lib/volume/tests/test_volume_io.cpp +++ b/src/lib/volume/tests/test_volume_io.cpp @@ -203,8 +203,6 @@ class VolumeIOImpl { if(auto it = m_lba_data.find(lba); it != m_lba_data.end()) { data_pattern = it->second; test_common::HBTestHelper::validate_data_buf(buf, m_vol_ptr->info()->page_size, data_pattern); - } else { - test_common::HBTestHelper::validate_zeros(buf, m_vol_ptr->info()->page_size); } LOGDEBUG("Verify data lba={} pattern expected={} actual={}", lba, data_pattern, *r_cast< uint64_t* >(read_blob.bytes())); diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index 7e2537a..b7ed618 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -274,7 +274,7 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const for (uint32_t i = 0, blk_count = 0, start_idx = 0; i < out_vector.size(); ++i, ++cur_lba) { auto const& [key, value] = out_vector[i]; // cur_lba is used to keep track of the holes - // fill the read buffer with zeroes for the holes + // move the read buffer by the size of the holes if(cur_lba != key.key()) { if(blk_count > 0) { // submit the read for the previous blkids @@ -282,9 +282,8 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const start_idx = i; blk_count = 0; } - auto fill_size = (key.key() - cur_lba) * vol->rd()->get_blk_size(); - std::memset(read_buf, 0, fill_size); - read_buf += fill_size; + auto offset = (key.key() - cur_lba) * vol->rd()->get_blk_size(); + read_buf += offset; cur_lba = key.key(); } @@ -310,10 +309,6 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const submit_read_to_backend(read_buf, futs, out_vector[start_idx], 1, vol, req->part_of_batch); } } - // fill the holes at the end of the read buffer - if(cur_lba != req->end_lba() + 1) { - std::memset(read_buf, 0, (req->end_lba() - cur_lba + 1) * vol->rd()->get_blk_size()); - } return out_vector.empty() ? folly::Unit() : folly::collectAllUnsafe(futs).thenValue([out_vector = std::move(out_vector), buf = req->buffer From fa11f7c0a8b003256e1821bdaeb1d113074561e7 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Mon, 19 May 2025 18:19:05 -0700 Subject: [PATCH 4/8] Add random read test --- src/lib/volume/tests/test_common.hpp | 6 --- src/lib/volume/tests/test_volume_io.cpp | 69 ++++++++++--------------- src/lib/volume_mgr.cpp | 35 +++++++------ 3 files changed, 46 insertions(+), 64 deletions(-) diff --git a/src/lib/volume/tests/test_common.hpp b/src/lib/volume/tests/test_common.hpp index 730c680..6a615d1 100644 --- a/src/lib/volume/tests/test_common.hpp +++ b/src/lib/volume/tests/test_common.hpp @@ -213,12 +213,6 @@ class HBTestHelper { } } - static void validate_zeros(uint8_t const* buf, uint64_t size) { - sisl::io_blob_safe blob(size, 512); - std::memset(blob.bytes(), 0, size); - RELEASE_ASSERT_EQ(std::memcmp(buf, blob.bytes(), size), 0, "data_buf mismatch"); - } - private: void init_devices(bool is_file, uint64_t dev_size = 0) { if (is_file) { diff --git a/src/lib/volume/tests/test_volume_io.cpp b/src/lib/volume/tests/test_volume_io.cpp index aa7063e..ea69d96 100644 --- a/src/lib/volume/tests/test_volume_io.cpp +++ b/src/lib/volume/tests/test_volume_io.cpp @@ -174,19 +174,6 @@ class VolumeIOImpl { }); } -<<<<<<< HEAD - void verify_all_data() { - for (auto& [lba, data_pattern] : m_lba_data) { - auto buffer = iomanager.iobuf_alloc(512, 4096); - vol_interface_req_ptr req(new vol_interface_req{buffer, lba, 1}); - - auto vol_mgr = g_helper->inst()->volume_manager(); - vol_mgr->read(m_vol_ptr, req).get(); - test_common::HBTestHelper::validate_data_buf(buffer, 4096, data_pattern); - LOGDEBUG("Verify data vol={} lba={} pattern={} {}", m_vol_name, lba, data_pattern, - *r_cast< uint64_t* >(buffer)); - iomanager.iobuf_free(buffer); -======= void read_and_verify(lba_t start_lba, uint32_t nlbas) { auto sz = nlbas * m_vol_ptr->info()->page_size; sisl::io_blob_safe read_blob(sz, 512); @@ -209,7 +196,7 @@ class VolumeIOImpl { } } - void verify_data(uint64_t nlbas_per_io = 1) { + void verify_all_data(uint64_t nlbas_per_io = 1) { auto start_lba = m_lba_data.begin()->first; auto max_lba = m_lba_data.rbegin()->first; verify_data(start_lba, max_lba, nlbas_per_io); @@ -221,7 +208,6 @@ class VolumeIOImpl { auto num_lbas_this_round = std::min(nlbas_per_io, max_lba - lba); read_and_verify(lba, num_lbas_this_round); num_lbas_verified += num_lbas_this_round; ->>>>>>> read API implementation } LOGINFO("Verified {} lbas for volume {}", num_lbas_verified, m_vol_ptr->info()->name); } @@ -283,21 +269,14 @@ class VolumeIOTest : public ::testing::Test { LOGINFO("IO completed"); } -<<<<<<< HEAD - void verify_all_data(shared< VolumeIOImpl > vol_impl = nullptr) { - if (vol_impl) { - vol_impl->verify_all_data(); -======= - void verify_data(shared< VolumeIOImpl > vol_impl = nullptr, uint64_t nlbas_per_io = 1) { + void verify_all_data(shared< VolumeIOImpl > vol_impl = nullptr, uint64_t nlbas_per_io = 1) { if (vol_impl) { - vol_impl->verify_data(nlbas_per_io); ->>>>>>> read API implementation + vol_impl->verify_all_data(nlbas_per_io); return; } for (auto& vol_impl : m_vols_impl) { -<<<<<<< HEAD - vol_impl->verify_all_data(); + vol_impl->verify_all_data(nlbas_per_io); } } @@ -305,14 +284,19 @@ class VolumeIOTest : public ::testing::Test { g_helper->restart(shutdown_delay); for (auto& vol_impl : m_vols_impl) { vol_impl->reset(); -======= - vol_impl->verify_data(nlbas_per_io); ->>>>>>> read API implementation } } std::vector< shared< VolumeIOImpl > >& volume_list() { return m_vols_impl; } + template < typename T > + T get_random_number(T min, T max) { + static std::random_device rd; + static std::mt19937 gen(rd()); + std::uniform_int_distribution< T > dis(min, max); + return dis(gen); + } + private: std::vector< shared< VolumeIOImpl > > m_vols_impl; }; @@ -333,34 +317,25 @@ TEST_F(VolumeIOTest, SingleVolumeWriteData) { restart(5); LOGINFO("Verify data"); -<<<<<<< HEAD verify_all_data(vol); -======= - verify_data(vol, 30 /* nlbas_per_io */); ->>>>>>> read API implementation + //verify_data(vol, 30 /* nlbas_per_io */); // Write and verify again on same LBA range to single volume multiple times. LOGINFO("Write and verify data with num_iter={} start={} nblks={}", num_iter, start_lba, nblks); for (uint32_t i = 0; i < num_iter; i++) { generate_io_single(vol, start_lba, nblks); } -<<<<<<< HEAD - verify_all_data(vol); -======= - verify_data(vol); - - // verify random lba ranges + verify_all_data(vol, 30 /* nlbas_per_io */); ->>>>>>> read API implementation LOGINFO("SingleVolumeWriteData test done."); } TEST_F(VolumeIOTest, SingleVolumeReadData) { // Write and verify fixed LBA range to single volume multiple times. auto vol = volume_list().back(); - uint32_t nblks = 500; - lba_t start_lba = 1000; + uint32_t nblks = 5000; + lba_t start_lba = 500; uint32_t num_iter = 1; LOGINFO("Write and verify data with num_iter={} start={} nblks={}", num_iter, start_lba, nblks); for (uint32_t i = 0; i < num_iter; i++) { @@ -371,7 +346,17 @@ TEST_F(VolumeIOTest, SingleVolumeReadData) { vol->verify_data(2000, 3000, 40); vol->verify_data(800, 1800, 40); - LOGINFO("SingleVolumeReadHoles test done."); + // random reads + num_iter = 100; + for(uint32_t i = 0; i < num_iter; i++) { + auto start_lba = get_random_number< lba_t >(0, 10000); + auto nblks = get_random_number< uint32_t >(1, 64); + auto no_lbas_per_io = get_random_number< uint64_t >(1, 50); + LOGINFO("iter {}: Read data start={} nblks={} no_lbas_per_io {}", i, start_lba, nblks, no_lbas_per_io); + vol->verify_data(start_lba, start_lba + nblks, no_lbas_per_io); + } + + LOGINFO("SingleVolumeRead test done."); } TEST_F(VolumeIOTest, MultipleVolumeWriteData) { diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index b7ed618..552adfd 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -259,26 +259,29 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const // TODO: check if the system is accepting ios (shutdown in progress etc) RELEASE_ASSERT(vol != nullptr, "VolumePtr is null"); // Step 1: get the blk ids from index table - std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > out_vector; - if(auto index_resp = read_from_index(vol, req, out_vector); index_resp.hasError()) { + std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > index_kvs; + if(auto index_resp = read_from_index(vol, req, index_kvs); index_resp.hasError()) { LOGE("Failed to read from index table for range=[{}, {}], volume id: {}, error: {}", req->lba, req->end_lba(), boost::uuids::to_string(vol->id()), index_resp.error()); return index_resp; } + if (index_kvs.empty()) { + return folly::Unit(); + } // Step 2: Consolidate the blk ids and issue read requests std::vector< folly::Future< std::error_code > > futs; auto* read_buf = req->buffer; DEBUG_ASSERT(read_buf != nullptr, "Read buffer is null"); auto cur_lba = req->lba; - for (uint32_t i = 0, blk_count = 0, start_idx = 0; i < out_vector.size(); ++i, ++cur_lba) { - auto const& [key, value] = out_vector[i]; + for (uint32_t i = 0, blk_count = 0, start_idx = 0; i < index_kvs.size(); ++i, ++cur_lba) { + auto const& [key, value] = index_kvs[i]; // cur_lba is used to keep track of the holes // move the read buffer by the size of the holes if(cur_lba != key.key()) { if(blk_count > 0) { // submit the read for the previous blkids - submit_read_to_backend(read_buf, futs, out_vector[start_idx], blk_count, vol, req->part_of_batch); + submit_read_to_backend(read_buf, futs, index_kvs[start_idx], blk_count, vol, req->part_of_batch); start_idx = i; blk_count = 0; } @@ -288,30 +291,30 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const } // Contiguous blkids are merged into a single read request - bool is_contiguous = (i == 0 || value.blkid().blk_num() == out_vector[i-1].second.blkid().blk_num() + 1); + bool is_contiguous = (i == 0 || (value.blkid().blk_num() == index_kvs[i-1].second.blkid().blk_num() + 1 + && value.blkid().chunk_num() == index_kvs[i-1].second.blkid().chunk_num())); if(is_contiguous) { blk_count++; - if(i < out_vector.size() - 1) { + if(i < index_kvs.size() - 1) { continue; } } // submit the read for the previous blkids - submit_read_to_backend(read_buf, futs, out_vector[start_idx], blk_count, vol, req->part_of_batch); - if(out_vector[start_idx].second.blkid().blk_num() + blk_count - 1 == value.blkid().blk_num()) { - // this is the last entry in the out vector + submit_read_to_backend(read_buf, futs, index_kvs[start_idx], blk_count, vol, req->part_of_batch); + if(index_kvs[start_idx].second.blkid().blk_num() + blk_count - 1 == value.blkid().blk_num()) { + // this is the last entry in the index_kvs continue; } // reset the blkids and size for the next read blk_count = 0; start_idx = i; - if(i == out_vector.size() - 1) { - submit_read_to_backend(read_buf, futs, out_vector[start_idx], 1, vol, req->part_of_batch); + if(i == index_kvs.size() - 1) { + submit_read_to_backend(read_buf, futs, index_kvs[start_idx], 1, vol, req->part_of_batch); } } - return out_vector.empty() ? folly::Unit() : - folly::collectAllUnsafe(futs).thenValue([out_vector = std::move(out_vector), buf = req->buffer + return folly::collectAllUnsafe(futs).thenValue([index_kvs = std::move(index_kvs), buf = req->buffer , start_lba = req->lba, blk_size = vol->rd()->get_blk_size()](auto&& vf) -> VolumeManager::Result< folly::Unit > { for (auto const& err_c : vf) { if (sisl_unlikely(err_c.value())) { @@ -320,8 +323,8 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const } // verify the checksum auto read_buf = buf; - for(uint64_t cur_lba = start_lba, i = 0; i < out_vector.size(); ++i, ++cur_lba) { - auto const& [key, value] = out_vector[i]; + for(uint64_t cur_lba = start_lba, i = 0; i < index_kvs.size(); ++i, ++cur_lba) { + auto const& [key, value] = index_kvs[i]; // ignore the holes if(cur_lba != key.key()) { read_buf += (key.key() - cur_lba) * blk_size; From ae979a9d260974fc88b21d4676305de5402c7c63 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Tue, 20 May 2025 11:56:08 -0700 Subject: [PATCH 5/8] move verify_checksum during read to a standalone function --- src/lib/homeblks_impl.hpp | 3 +++ src/lib/volume_mgr.cpp | 52 ++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index aac9602..b420c40 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -133,6 +133,9 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab void on_write(int64_t lsn, const sisl::blob& header, const sisl::blob& key, const std::vector< homestore::MultiBlkId >& blkids, cintrusive< homestore::repl_req_ctx >& ctx); + VolumeManager::Result< folly::Unit > verify_checksum(std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >const& index_kvs, + uint8_t* buf, lba_t start_lba, uint32_t blk_size); + private: // Should only be called for first-time-boot void superblk_init(); diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index 552adfd..2077b43 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -255,11 +255,19 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, c }); } +struct vol_mgr_read_ctx { + uint8_t* buf; + lba_t start_lba; + uint32_t blk_sz; + std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > index_kvs{}; +}; + VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const vol_interface_req_ptr& req) { // TODO: check if the system is accepting ios (shutdown in progress etc) RELEASE_ASSERT(vol != nullptr, "VolumePtr is null"); // Step 1: get the blk ids from index table - std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > index_kvs; + vol_mgr_read_ctx read_ctx{.buf = req->buffer, .start_lba = req->lba, .blk_sz = vol->rd()->get_blk_size()}; + auto& index_kvs = read_ctx.index_kvs; if(auto index_resp = read_from_index(vol, req, index_kvs); index_resp.hasError()) { LOGE("Failed to read from index table for range=[{}, {}], volume id: {}, error: {}", req->lba, req->end_lba(), boost::uuids::to_string(vol->id()), index_resp.error()); @@ -314,34 +322,38 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const } } - return folly::collectAllUnsafe(futs).thenValue([index_kvs = std::move(index_kvs), buf = req->buffer - , start_lba = req->lba, blk_size = vol->rd()->get_blk_size()](auto&& vf) -> VolumeManager::Result< folly::Unit > { + return folly::collectAllUnsafe(futs).thenValue([this, read_ctx = std::move(read_ctx)](auto&& vf) -> VolumeManager::Result< folly::Unit > { for (auto const& err_c : vf) { if (sisl_unlikely(err_c.value())) { auto ec = err_c.value(); return folly::makeUnexpected(to_volume_error(ec)); } - // verify the checksum - auto read_buf = buf; - for(uint64_t cur_lba = start_lba, i = 0; i < index_kvs.size(); ++i, ++cur_lba) { - auto const& [key, value] = index_kvs[i]; - // ignore the holes - if(cur_lba != key.key()) { - read_buf += (key.key() - cur_lba) * blk_size; - cur_lba = key.key(); - } - auto checksum = crc16_t10dif(init_crc_16, static_cast< unsigned char* >(read_buf), blk_size); - if(checksum != value.checksum()) { - LOGE("crc mismatch for lba: {}, blk id {}, expected: {}, actual: {}", cur_lba, value.blkid().to_string(), value.checksum(), checksum); - return folly::makeUnexpected(VolumeError::CRC_MISMATCH); - } - read_buf += blk_size; - } } - return folly::Unit(); + // verify the checksum and return + return verify_checksum(read_ctx.index_kvs, read_ctx.buf, read_ctx.start_lba, read_ctx.blk_sz); }); } +VolumeManager::Result< folly::Unit > HomeBlocksImpl::verify_checksum(std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >const& index_kvs, + uint8_t* buf, lba_t start_lba, uint32_t blk_size) { + auto read_buf = buf; + for(uint64_t cur_lba = start_lba, i = 0; i < index_kvs.size(); ++i, ++cur_lba) { + auto const& [key, value] = index_kvs[i]; + // ignore the holes + if(cur_lba != key.key()) { + read_buf += (key.key() - cur_lba) * blk_size; + cur_lba = key.key(); + } + auto checksum = crc16_t10dif(init_crc_16, static_cast< unsigned char* >(read_buf), blk_size); + if(checksum != value.checksum()) { + LOGE("crc mismatch for lba: {}, blk id {}, expected: {}, actual: {}", cur_lba, value.blkid().to_string(), value.checksum(), checksum); + return folly::makeUnexpected(VolumeError::CRC_MISMATCH); + } + read_buf += blk_size; + } + return folly::Unit(); +} + VolumeManager::NullAsyncResult HomeBlocksImpl::unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) { RELEASE_ASSERT(false, "Unmap Not implemented"); return folly::Unit(); From dc1417caead32d9bdca3dcb15986bfb8e4bf2034 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Wed, 21 May 2025 14:09:16 -0700 Subject: [PATCH 6/8] simplify the read API --- conanfile.py | 2 +- src/lib/homeblks_impl.hpp | 18 ++++- src/lib/index.cpp | 4 +- src/lib/volume_mgr.cpp | 136 +++++++++++++++++--------------------- 4 files changed, 78 insertions(+), 82 deletions(-) diff --git a/conanfile.py b/conanfile.py index deef21b..fa43a5c 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeBlocksConan(ConanFile): name = "homeblocks" - version = "1.0.18" + version = "1.0.19" homepage = "https://github.com/eBay/HomeBlocks" description = "Block Store built on HomeStore" topics = ("ebay") diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index b420c40..d5b2e74 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -40,6 +40,16 @@ struct VolJournalEntry { uint16_t num_old_blks; }; +using index_kv_list_t = std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >; +using read_blks_list_t = std::vector< std::pair< lba_t, homestore::MultiBlkId > >; + +struct vol_mgr_read_ctx { + uint8_t* buf; + lba_t start_lba; + uint32_t blk_size; + index_kv_list_t index_kvs{}; +}; + class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enable_shared_from_this< HomeBlocksImpl > { struct homeblks_sb_t { uint64_t magic; @@ -133,8 +143,7 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab void on_write(int64_t lsn, const sisl::blob& header, const sisl::blob& key, const std::vector< homestore::MultiBlkId >& blkids, cintrusive< homestore::repl_req_ctx >& ctx); - VolumeManager::Result< folly::Unit > verify_checksum(std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >const& index_kvs, - uint8_t* buf, lba_t start_lba, uint32_t blk_size); + VolumeManager::Result< folly::Unit > verify_checksum(vol_mgr_read_ctx const& read_ctx); private: // Should only be called for first-time-boot @@ -154,7 +163,10 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab VolumeManager::Result< folly::Unit > write_to_index(const VolumePtr& vol_ptr, lba_t start_lba, lba_t end_lba, std::unordered_map< lba_t, BlockInfo >& blocks_info); VolumeManager::Result< folly::Unit > read_from_index(const VolumePtr& vol_ptr, const vol_interface_req_ptr& req, - std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >& out_vector); + index_kv_list_t& index_kvs); + void generate_blkids_to_read(const index_kv_list_t& index_kvs, read_blks_list_t& blks_to_read); + void submit_read_to_backend(read_blks_list_t const& blks_to_read, const vol_interface_req_ptr& req, + const VolumePtr& vol, std::vector< folly::Future< std::error_code > >& futs); }; class HBIndexSvcCB : public homestore::IndexServiceCallbacks { diff --git a/src/lib/index.cpp b/src/lib/index.cpp index 459cb5d..4884614 100644 --- a/src/lib/index.cpp +++ b/src/lib/index.cpp @@ -52,12 +52,12 @@ HomeBlocksImpl::write_to_index(const VolumePtr& vol_ptr, lba_t start_lba, lba_t } VolumeManager::Result< folly::Unit > HomeBlocksImpl::read_from_index(const VolumePtr& vol_ptr, const vol_interface_req_ptr& req, - std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >& out_vector) { + index_kv_list_t& index_kvs) { homestore::BtreeQueryRequest< VolumeIndexKey > qreq{homestore::BtreeKeyRange< VolumeIndexKey >{VolumeIndexKey{req->lba}, VolumeIndexKey{req->end_lba()}}, homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY}; auto index_table = vol_ptr->indx_table(); RELEASE_ASSERT(index_table != nullptr, "Index table is null for volume id: {}", boost::uuids::to_string(vol_ptr->id())); - if (auto ret = index_table->query(qreq, out_vector); ret != homestore::btree_status_t::success) { + if (auto ret = index_table->query(qreq, index_kvs); ret != homestore::btree_status_t::success) { return folly::makeUnexpected(VolumeError::INDEX_ERROR); } return folly::Unit(); diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index 2077b43..8e95224 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -28,21 +28,6 @@ static VolumeError to_volume_error(std::error_code ec) { } } -static void submit_read_to_backend(uint8_t*& read_buf, std::vector< folly::Future< std::error_code > >& futs, - const std::pair< VolumeIndexKey, VolumeIndexValue >& first_blk_in_contiguous_range, uint32_t blk_count, - const VolumePtr& vol, bool part_of_batch) { - // construct the blkid - auto blk_num = first_blk_in_contiguous_range.second.blkid().blk_num(); - auto chunk_num = first_blk_in_contiguous_range.second.blkid().chunk_num(); - auto blkid = homestore::MultiBlkId(blk_num, blk_count, chunk_num); - sisl::sg_list sgs; - auto size = blk_count * vol->rd()->get_blk_size(); - sgs.size = size; - sgs.iovs.emplace_back(iovec{.iov_base = read_buf, .iov_len = size}); - read_buf += size; - futs.emplace_back(vol->rd()->async_read(blkid, sgs, size, part_of_batch)); -} - std::shared_ptr< VolumeManager > HomeBlocksImpl::volume_manager() { return shared_from_this(); } void HomeBlocksImpl::on_vol_meta_blk_found(sisl::byte_view const& buf, void* cookie) { @@ -255,73 +240,73 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, c }); } -struct vol_mgr_read_ctx { - uint8_t* buf; - lba_t start_lba; - uint32_t blk_sz; - std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > > index_kvs{}; -}; +void HomeBlocksImpl::generate_blkids_to_read(const index_kv_list_t& index_kvs, read_blks_list_t& blks_to_read) { + for(uint32_t i = 0, start_idx = 0; i < index_kvs.size(); ++i) { + auto const& [key, value] = index_kvs[i]; + bool is_contiguous = (i == 0 || (value.blkid().blk_num() == index_kvs[i-1].second.blkid().blk_num() + 1 + && value.blkid().chunk_num() == index_kvs[i-1].second.blkid().chunk_num())); + if(is_contiguous && i < index_kvs.size() - 1) { + // continue to the next entry if it is contiguous + continue; + } + // prepare the previous contiguous blkids to read + auto blk_num = index_kvs[start_idx].second.blkid().blk_num(); + auto chunk_num = index_kvs[start_idx].second.blkid().chunk_num(); + // if the last entry is part of the contiguous block, + // we need to account for it in the blk_count + auto blk_count = is_contiguous ? (i - start_idx + 1) : (i - start_idx); + blks_to_read.emplace_back(index_kvs[start_idx].first.key(), homestore::MultiBlkId(blk_num, blk_count, chunk_num)); + start_idx = i; + if(!is_contiguous && i == index_kvs.size() - 1) { + // if the last entry is not contiguous, we need to add it as well + blks_to_read.emplace_back(key.key(), homestore::MultiBlkId(value.blkid().blk_num(), 1, value.blkid().chunk_num())); + } + } +} + +void HomeBlocksImpl::submit_read_to_backend(read_blks_list_t const& blks_to_read, const vol_interface_req_ptr& req, + const VolumePtr& vol, std::vector< folly::Future< std::error_code > >& futs) { + auto* read_buf = req->buffer; + DEBUG_ASSERT(read_buf != nullptr, "Read buffer is null"); + for(uint32_t i = 0, prev_lba = req->lba, prev_nblks = 0; i < blks_to_read.size(); ++i) { + auto const& [start_lba, blkids] = blks_to_read[i]; + DEBUG_ASSERT(start_lba >= prev_lba + prev_nblks, "Invalid start lba: {}, prev_lba: {}, prev_nblks: {}", + start_lba, prev_lba, prev_nblks); + auto holes_nblks = start_lba - (prev_lba + prev_nblks); + read_buf += (holes_nblks * vol->rd()->get_blk_size()); + sisl::sg_list sgs; + sgs.size = blkids.blk_count() * vol->rd()->get_blk_size(); + sgs.iovs.emplace_back(iovec{.iov_base = read_buf, .iov_len = sgs.size}); + read_buf += sgs.size; + futs.emplace_back(vol->rd()->async_read(blkids, sgs, sgs.size, req->part_of_batch)); + prev_lba = start_lba; + prev_nblks = blkids.blk_count(); + } +} VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const vol_interface_req_ptr& req) { // TODO: check if the system is accepting ios (shutdown in progress etc) RELEASE_ASSERT(vol != nullptr, "VolumePtr is null"); // Step 1: get the blk ids from index table - vol_mgr_read_ctx read_ctx{.buf = req->buffer, .start_lba = req->lba, .blk_sz = vol->rd()->get_blk_size()}; - auto& index_kvs = read_ctx.index_kvs; - if(auto index_resp = read_from_index(vol, req, index_kvs); index_resp.hasError()) { + vol_mgr_read_ctx read_ctx{.buf = req->buffer, .start_lba = req->lba, .blk_size = vol->rd()->get_blk_size()}; + if(auto index_resp = read_from_index(vol, req, read_ctx.index_kvs); index_resp.hasError()) { LOGE("Failed to read from index table for range=[{}, {}], volume id: {}, error: {}", req->lba, req->end_lba(), boost::uuids::to_string(vol->id()), index_resp.error()); return index_resp; } - if (index_kvs.empty()) { + if (read_ctx.index_kvs.empty()) { return folly::Unit(); } - // Step 2: Consolidate the blk ids and issue read requests + // Step 2: Consolidate the blocks by merging the contiguous blkids std::vector< folly::Future< std::error_code > > futs; - auto* read_buf = req->buffer; - DEBUG_ASSERT(read_buf != nullptr, "Read buffer is null"); - auto cur_lba = req->lba; - for (uint32_t i = 0, blk_count = 0, start_idx = 0; i < index_kvs.size(); ++i, ++cur_lba) { - auto const& [key, value] = index_kvs[i]; - // cur_lba is used to keep track of the holes - // move the read buffer by the size of the holes - if(cur_lba != key.key()) { - if(blk_count > 0) { - // submit the read for the previous blkids - submit_read_to_backend(read_buf, futs, index_kvs[start_idx], blk_count, vol, req->part_of_batch); - start_idx = i; - blk_count = 0; - } - auto offset = (key.key() - cur_lba) * vol->rd()->get_blk_size(); - read_buf += offset; - cur_lba = key.key(); - } - - // Contiguous blkids are merged into a single read request - bool is_contiguous = (i == 0 || (value.blkid().blk_num() == index_kvs[i-1].second.blkid().blk_num() + 1 - && value.blkid().chunk_num() == index_kvs[i-1].second.blkid().chunk_num())); - if(is_contiguous) { - blk_count++; - if(i < index_kvs.size() - 1) { - continue; - } - } - // submit the read for the previous blkids - submit_read_to_backend(read_buf, futs, index_kvs[start_idx], blk_count, vol, req->part_of_batch); - if(index_kvs[start_idx].second.blkid().blk_num() + blk_count - 1 == value.blkid().blk_num()) { - // this is the last entry in the index_kvs - continue; - } + read_blks_list_t blks_to_read; + generate_blkids_to_read(read_ctx.index_kvs, blks_to_read); - // reset the blkids and size for the next read - blk_count = 0; - start_idx = i; - if(i == index_kvs.size() - 1) { - submit_read_to_backend(read_buf, futs, index_kvs[start_idx], 1, vol, req->part_of_batch); - } - } + // Step 3: Submit the read requests to backend + submit_read_to_backend(blks_to_read, req, vol, futs); + // Step 4: verify the checksum after all the reads are done return folly::collectAllUnsafe(futs).thenValue([this, read_ctx = std::move(read_ctx)](auto&& vf) -> VolumeManager::Result< folly::Unit > { for (auto const& err_c : vf) { if (sisl_unlikely(err_c.value())) { @@ -330,26 +315,25 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const } } // verify the checksum and return - return verify_checksum(read_ctx.index_kvs, read_ctx.buf, read_ctx.start_lba, read_ctx.blk_sz); + return verify_checksum(read_ctx); }); } -VolumeManager::Result< folly::Unit > HomeBlocksImpl::verify_checksum(std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >const& index_kvs, - uint8_t* buf, lba_t start_lba, uint32_t blk_size) { - auto read_buf = buf; - for(uint64_t cur_lba = start_lba, i = 0; i < index_kvs.size(); ++i, ++cur_lba) { - auto const& [key, value] = index_kvs[i]; +VolumeManager::Result< folly::Unit > HomeBlocksImpl::verify_checksum(vol_mgr_read_ctx const& read_ctx) { + auto read_buf = read_ctx.buf; + for(uint64_t cur_lba = read_ctx.start_lba, i = 0; i < read_ctx.index_kvs.size(); ++i, ++cur_lba) { + auto const& [key, value] = read_ctx.index_kvs[i]; // ignore the holes if(cur_lba != key.key()) { - read_buf += (key.key() - cur_lba) * blk_size; + read_buf += (key.key() - cur_lba) * read_ctx.blk_size; cur_lba = key.key(); } - auto checksum = crc16_t10dif(init_crc_16, static_cast< unsigned char* >(read_buf), blk_size); + auto checksum = crc16_t10dif(init_crc_16, static_cast< unsigned char* >(read_buf), read_ctx.blk_size); if(checksum != value.checksum()) { LOGE("crc mismatch for lba: {}, blk id {}, expected: {}, actual: {}", cur_lba, value.blkid().to_string(), value.checksum(), checksum); return folly::makeUnexpected(VolumeError::CRC_MISMATCH); } - read_buf += blk_size; + read_buf += read_ctx.blk_size; } return folly::Unit(); } From 3cfc9de7378d2aaae06604f5726eb7b8fda639d0 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Wed, 21 May 2025 17:19:52 -0700 Subject: [PATCH 7/8] rename vol_mgr_read_ctx --- src/lib/homeblks_impl.hpp | 4 ++-- src/lib/volume_mgr.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index d5b2e74..eecd77b 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -43,7 +43,7 @@ struct VolJournalEntry { using index_kv_list_t = std::vector< std::pair< VolumeIndexKey, VolumeIndexValue > >; using read_blks_list_t = std::vector< std::pair< lba_t, homestore::MultiBlkId > >; -struct vol_mgr_read_ctx { +struct vol_read_ctx { uint8_t* buf; lba_t start_lba; uint32_t blk_size; @@ -143,7 +143,7 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab void on_write(int64_t lsn, const sisl::blob& header, const sisl::blob& key, const std::vector< homestore::MultiBlkId >& blkids, cintrusive< homestore::repl_req_ctx >& ctx); - VolumeManager::Result< folly::Unit > verify_checksum(vol_mgr_read_ctx const& read_ctx); + VolumeManager::Result< folly::Unit > verify_checksum(vol_read_ctx const& read_ctx); private: // Should only be called for first-time-boot diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index 8e95224..0507718 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -288,7 +288,7 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const // TODO: check if the system is accepting ios (shutdown in progress etc) RELEASE_ASSERT(vol != nullptr, "VolumePtr is null"); // Step 1: get the blk ids from index table - vol_mgr_read_ctx read_ctx{.buf = req->buffer, .start_lba = req->lba, .blk_size = vol->rd()->get_blk_size()}; + vol_read_ctx read_ctx{.buf = req->buffer, .start_lba = req->lba, .blk_size = vol->rd()->get_blk_size()}; if(auto index_resp = read_from_index(vol, req, read_ctx.index_kvs); index_resp.hasError()) { LOGE("Failed to read from index table for range=[{}, {}], volume id: {}, error: {}", req->lba, req->end_lba(), boost::uuids::to_string(vol->id()), index_resp.error()); @@ -319,7 +319,7 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const }); } -VolumeManager::Result< folly::Unit > HomeBlocksImpl::verify_checksum(vol_mgr_read_ctx const& read_ctx) { +VolumeManager::Result< folly::Unit > HomeBlocksImpl::verify_checksum(vol_read_ctx const& read_ctx) { auto read_buf = read_ctx.buf; for(uint64_t cur_lba = read_ctx.start_lba, i = 0; i < read_ctx.index_kvs.size(); ++i, ++cur_lba) { auto const& [key, value] = read_ctx.index_kvs[i]; From 270aafb8e4c4abf9b6054bad61fc61325974adf5 Mon Sep 17 00:00:00 2001 From: Ravi Nagarjun Akella Date: Thu, 22 May 2025 08:48:10 -0700 Subject: [PATCH 8/8] add unit test to read with holes --- src/lib/volume/tests/test_volume_io.cpp | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/lib/volume/tests/test_volume_io.cpp b/src/lib/volume/tests/test_volume_io.cpp index ea69d96..905e617 100644 --- a/src/lib/volume/tests/test_volume_io.cpp +++ b/src/lib/volume/tests/test_volume_io.cpp @@ -359,6 +359,35 @@ TEST_F(VolumeIOTest, SingleVolumeReadData) { LOGINFO("SingleVolumeRead test done."); } +TEST_F(VolumeIOTest, SingleVolumeReadHoles) { + auto vol = volume_list().back(); + uint32_t nblks = 5000; + lba_t start_lba = 500; + generate_io_single(vol, start_lba, nblks); + + // Verify with no holes in the range + vol->verify_data(1000, 2000, 40); + + start_lba = 10000; + nblks = 50; + for(uint32_t i = 0; i/2 < nblks; i+=2) { + generate_io_single(vol, start_lba+i, 1); + } + + // Verfy with hole after each lba + vol->verify_data(10000, 10100, 50); + + start_lba = 20000; + for(uint32_t i = 0; i < 100; i++) { + if(i%7 > 2) { + generate_io_single(vol, start_lba+i, 1); + } + } + // Verify with mixed holes in the range + vol->verify_data(20000, 20100, 50); + +} + TEST_F(VolumeIOTest, MultipleVolumeWriteData) { LOGINFO("Write data randomly on num_vols={} num_io={}", SISL_OPTIONS["num_vols"].as< uint32_t >(), SISL_OPTIONS["num_io"].as< uint64_t >());