From b1f675f4f078763c2f0631838083036c130ff38f Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Thu, 24 Aug 2023 17:08:20 +0800 Subject: [PATCH 01/31] enhance tar/zfile checking Signed-off-by: yuchen.cc --- src/overlaybd/lsmt/file.cpp | 3 ++- src/overlaybd/registryfs/registryfs.cpp | 2 +- src/overlaybd/tar/tar_file.cpp | 24 ++++++++++++++++-------- src/overlaybd/zfile/zfile.cpp | 3 +++ src/switch_file.cpp | 6 ++++++ src/tools/overlaybd-zfile.cpp | 2 +- 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/overlaybd/lsmt/file.cpp b/src/overlaybd/lsmt/file.cpp index d75ff27f..12ec4e16 100644 --- a/src/overlaybd/lsmt/file.cpp +++ b/src/overlaybd/lsmt/file.cpp @@ -1528,7 +1528,7 @@ void *do_parallel_load_index(void *param) { return nullptr; } auto file = job->get_file(); - LOG_INFO("check file if normalfile or LSMTFile"); + LOG_INFO("check `-th file is normal file or LSMT file", job->i); IMemoryIndex *pi = nullptr; LSMT::SegmentMapping *p = nullptr; auto type = file->ioctl(IFileRO::GetType); @@ -1560,6 +1560,7 @@ void *do_parallel_load_index(void *param) { LOG_ERROR_RETURN(0, nullptr, "failed to create memory index!"); } job->set_index(pi); + LOG_INFO("load index from `-th file done", job->i); } return NULL; } diff --git a/src/overlaybd/registryfs/registryfs.cpp b/src/overlaybd/registryfs/registryfs.cpp index 98ac667f..d19238f2 100644 --- a/src/overlaybd/registryfs/registryfs.cpp +++ b/src/overlaybd/registryfs/registryfs.cpp @@ -425,7 +425,7 @@ class RegistryFileImpl : public photon::fs::VirtualReadOnlyFile { LOG_WARN("failed to perform HTTP GET, going to retry ", VALUE(code), VALUE(offset), VALUE(count), VALUE(ret_len), eno); - photon::thread_usleep(1000); + photon::thread_usleep(10000); goto again; } else { LOG_ERROR_RETURN(ENOENT, -1, "failed to perform HTTP GET ", VALUE(m_url), diff --git a/src/overlaybd/tar/tar_file.cpp b/src/overlaybd/tar/tar_file.cpp index fa05b03b..ee520d2d 100644 --- a/src/overlaybd/tar/tar_file.cpp +++ b/src/overlaybd/tar/tar_file.cpp @@ -301,20 +301,23 @@ class TarFs : public ForwardFS_Ownership { int is_tar_file(IFile *file) { TarHeader th_buf; - if (file->pread(&th_buf, T_BLOCKSIZE, 0) != T_BLOCKSIZE) { - LOG_DEBUG("error read tar file header"); + auto ret = file->pread(&th_buf, T_BLOCKSIZE, 0); + if (ret < 0) { + LOG_ERROR_RETURN(0, -1, "read tar file header failed"); + } else if (ret != T_BLOCKSIZE) { + LOG_WARN("read tar file header error, expect `, ret `", T_BLOCKSIZE, ret); return 0; } if (strncmp(th_buf.magic, TMAGIC, TMAGLEN - 1) != 0) { - LOG_DEBUG("unknown magic value in tar header"); + LOG_INFO("unknown magic value in tar header"); return 0; } if (strncmp(th_buf.version, TVERSION, TVERSLEN) != 0) { - LOG_DEBUG("unknown version value in tar header"); + LOG_INFO("unknown version value in tar header"); return 0; } if (!th_buf.crc_ok()) { - LOG_DEBUG("tar header checksum error"); + LOG_INFO("tar header checksum error"); return 0; } return 1; @@ -329,11 +332,16 @@ IFile *new_tar_file(IFile *file, bool create) { } IFile *open_tar_file(IFile *file) { - if (is_tar_file(file) == 1) { + auto ret = is_tar_file(file); + if (ret == 1) { + LOG_INFO("open file as tar file"); return new_tar_file(file); + } else if (ret == 0) { + LOG_INFO("open file as normal file"); + return file; + } else { + LOG_ERROR_RETURN(0, nullptr, "open tar file failed"); } - LOG_DEBUG("not tar file, open as normal file"); - return file; // open as normal file } IFileSystem *new_tar_fs_adaptor(IFileSystem *fs) { diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index c4f2e6aa..d75e7036 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -938,6 +938,9 @@ int zfile_validation_check(IFile *src) { } int is_zfile(IFile *file) { + if (!file) { + LOG_ERROR_RETURN(0, -1, "file is nullptr."); + } char buf[CompressionFile::HeaderTrailer::SPACE]; auto ret = file->pread(buf, CompressionFile::HeaderTrailer::SPACE, 0); if (ret < (ssize_t)CompressionFile::HeaderTrailer::SPACE) diff --git a/src/switch_file.cpp b/src/switch_file.cpp index 166b4280..57815c9d 100644 --- a/src/switch_file.cpp +++ b/src/switch_file.cpp @@ -49,8 +49,10 @@ static IFile *try_open_zfile(IFile *file, bool verify, const char *file_path) { LOG_ERROR_RETURN(0, nullptr, "zfile_open_ro failed, path: `: error: `(`)", file_path, errno, strerror(errno)); } + LOG_INFO("open file as zfile"); return zf; } + LOG_INFO("file is not zfile format"); return file; } @@ -83,6 +85,10 @@ class SwitchFile : public ISwitchFile { } file = try_open_zfile(new_tar_file_adaptor(file), false, m_filepath.c_str()); + if (file == nullptr) { + LOG_ERROR("failed to open commit file as zfile, path: `", m_filepath); + return; + } LOG_INFO("switch to localfile '`' success.", m_filepath); m_local_file = file; } diff --git a/src/tools/overlaybd-zfile.cpp b/src/tools/overlaybd-zfile.cpp index 0aed2383..e8fb5170 100644 --- a/src/tools/overlaybd-zfile.cpp +++ b/src/tools/overlaybd-zfile.cpp @@ -41,7 +41,7 @@ IFileSystem *lfs = nullptr; int verify_crc(IFile* src_file) { - if (!is_zfile(src_file)) { + if (is_zfile(src_file) != 1) { fprintf(stderr, "format error! should be a zfile.\n"); exit(-1); } From 712b0eaf2a2ba0be24d5556a2e75e7602820b4bb Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Thu, 24 Aug 2023 17:51:41 +0800 Subject: [PATCH 02/31] refactor io sure Signed-off-by: liulanzheng --- src/CMakeLists.txt | 1 - src/image_file.cpp | 19 ++----- src/main.cpp | 26 ++++++++- src/sure_file.cpp | 131 --------------------------------------------- src/sure_file.h | 24 --------- 5 files changed, 30 insertions(+), 171 deletions(-) delete mode 100644 src/sure_file.cpp delete mode 100644 src/sure_file.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 94e9df64..3c8372e4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,7 +10,6 @@ add_subdirectory(overlaybd) add_library(overlaybd_image_lib image_file.cpp image_service.cpp - sure_file.cpp switch_file.cpp bk_download.cpp prefetch.cpp diff --git a/src/image_file.cpp b/src/image_file.cpp index f15627dc..7a1b5f25 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -31,7 +31,6 @@ #include "overlaybd/zfile/zfile.h" #include "config.h" #include "image_file.h" -#include "sure_file.h" #include "switch_file.h" #include "overlaybd/gzip/gz.h" #include "overlaybd/gzindex/gzfile.h" @@ -168,13 +167,6 @@ IFile *ImageFile::__open_ro_remote(const std::string &dir, const std::string &di LOG_ERROR_RETURN(0, nullptr, "failed to open switch file `", url); } - IFile *sure_file = new_sure_file(switch_file, this); - if (!sure_file) { - set_failed("failed to open sure file `" + url); - delete switch_file; - LOG_ERROR_RETURN(0, nullptr, "failed to open sure file `", url); - } - if (conf.HasMember("download") && conf.download().enable() == 1) { // download from registry, verify sha256 after downloaded. IFile *srcfile = image_service.global_fs.srcfs->open(url.c_str(), O_RDONLY); @@ -189,7 +181,7 @@ IFile *ImageFile::__open_ro_remote(const std::string &dir, const std::string &di } } - return sure_file; + return switch_file; } void ImageFile::start_bk_dl_thread() { @@ -370,14 +362,13 @@ LSMT::IFileRW *ImageFile::open_upper(ImageConfigNS::UpperConfig &upper) { IFile *idx_file = NULL; IFile *target_file = NULL; LSMT::IFileRW *ret = NULL; - - data_file = new_sure_file_by_path(upper.data().c_str(), O_RDWR, this); + data_file = open_localfile_adaptor(upper.data().c_str(), O_RDWR, 0644); if (!data_file) { LOG_ERROR("open(`,flags), `:`", upper.data(), errno, strerror(errno)); goto ERROR_EXIT; } - idx_file = new_sure_file_by_path(upper.index().c_str(), O_RDWR, this); + idx_file = open_localfile_adaptor(upper.index().c_str(), O_RDWR, 0644); if (!idx_file) { LOG_ERROR("open(`,flags), `:`", upper.index(), errno, strerror(errno)); goto ERROR_EXIT; @@ -385,13 +376,13 @@ LSMT::IFileRW *ImageFile::open_upper(ImageConfigNS::UpperConfig &upper) { if (upper.target() != "") { LOG_INFO("turboOCIv1 upper layer : `, `, `, `", upper.index(), upper.data(), upper.target()); - target_file = new_sure_file_by_path(upper.target().c_str(), O_RDWR, this); + target_file = open_localfile_adaptor(upper.target().c_str(), O_RDWR, 0644); if (!target_file) { LOG_ERROR("open(`,flags), `:`", upper.target(), errno, strerror(errno)); goto ERROR_EXIT; } if (upper.gzipIndex() != "") { - auto gzip_index = new_sure_file_by_path(upper.gzipIndex().c_str(), O_RDWR, this); + auto gzip_index = open_localfile_adaptor(upper.gzipIndex().c_str(), O_RDWR, 0644); if (!gzip_index) { LOG_ERROR("open(`,flags), `:`", upper.gzipIndex(), errno, strerror(errno)); goto ERROR_EXIT; diff --git a/src/main.cpp b/src/main.cpp index 8f239f8b..4f4f9f34 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -94,6 +94,29 @@ class TCMULoop { } }; +using SureIODelegate = Delegate; + +ssize_t sure(SureIODelegate io, const struct iovec *iov, int iovcnt, off_t offset) { + auto time_st = photon::now; + uint64_t try_cnt = 0, sleep_period = 20UL * 1000; +again: + if (photon::now - time_st > 7LL * 24 * 60 * 60 * 1000 * 1000 /*7days*/) { + LOG_ERROR_RETURN(EIO, -1, "sure request timeout, offset: `", offset); + } + ssize_t ret = io(iov, iovcnt, offset); + if (ret >= 0) { + return ret; + } + if (try_cnt % 10 == 0) { + LOG_ERROR("io request failed, offset: `, ret: `, retry times: `, errno:`", offset, ret, + try_cnt, errno); + } + try_cnt++; + photon::thread_usleep(sleep_period); + sleep_period = std::min(sleep_period * 2, 30UL * 1000 * 1000); + goto again; +} + void cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd) { obd_dev *odev = (obd_dev *)tcmu_dev_get_private(dev); ImageFile *file = odev->file; @@ -142,7 +165,8 @@ void cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd) { case READ_12: case READ_16: length = tcmu_iovec_length(cmd->iovec, cmd->iov_cnt); - ret = file->preadv(cmd->iovec, cmd->iov_cnt, tcmu_cdb_to_byte(dev, cmd->cdb)); + ret = sure({file, &ImageFile::preadv}, cmd->iovec, cmd->iov_cnt, + tcmu_cdb_to_byte(dev, cmd->cdb)); if (ret == length) { tcmulib_command_complete(dev, cmd, TCMU_STS_OK); } else { diff --git a/src/sure_file.cpp b/src/sure_file.cpp deleted file mode 100644 index 0072a87a..00000000 --- a/src/sure_file.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -#include -#include -#include -#include -#include -#include -#include "image_file.h" -#include "sure_file.h" - -using namespace photon::fs; - -class SureFile : public ForwardFile_Ownership { -public: - SureFile() = delete; - SureFile(IFile *src_file, ImageFile *image_file, bool ownership) - : ForwardFile_Ownership(src_file, ownership), m_ifile(image_file) { - } - -private: - ImageFile *m_ifile = nullptr; - - void io_sleep(uint64_t &try_cnt) { - if (try_cnt < 10) - photon::thread_usleep(500); // 500us - else - photon::thread_usleep(2000); // 2ms - - if (try_cnt > 30000) // >1min - photon::thread_sleep(1); // 1sec - } - - void io_hand() { - while (m_ifile && m_ifile->m_status >= 0) { - LOG_ERROR("write(...) incorrect, io hang here!"); - photon::thread_sleep(300); - } - } - -public: - virtual ssize_t write(const void *buf, size_t count) override { - size_t done_cnt = 0; - while (m_ifile && m_ifile->m_status >= 0 && done_cnt < count) { - ssize_t ret = m_file->write((char *)buf + done_cnt, count - done_cnt); - if (ret > 0) - done_cnt += ret; - if (done_cnt == count) - return count; - if (done_cnt > count) { - LOG_ERROR("write(...), done_cnt(`)>count(`), ret:`, errno:`, need io hang", - done_cnt, count, ret, errno); - io_hand(); - } - - if (ret == -1 && errno == EINTR) { - LOG_INFO("write(...), errno:EINTR, need continue try."); - continue; - } else { - LOG_ERROR("write(...), done_cnt(`)>count(`), ret:`, errno:`, need io hang", - done_cnt, count, ret, errno); - io_hand(); - } - } - return done_cnt; - } - - virtual ssize_t pread(void *buf, size_t count, off_t offset) override { - uint64_t try_cnt = 0; - size_t got_cnt = 0; - auto time_st = photon::now; - while (m_ifile && m_ifile->m_status >= 0 && photon::now - time_st < 1000UL * INT32_MAX) { - // exit on image in exit status, or timeout - ssize_t ret = m_file->pread((char *)buf + got_cnt, count - got_cnt, offset + got_cnt); - if (ret > 0) - got_cnt += ret; - if (got_cnt == count) - return count; - - if ((ret < 0) && (m_ifile->m_status < 1) && (errno == EPERM)) { - // exit when booting. after boot, hang. - m_ifile->set_auth_failed(); - LOG_ERROR_RETURN(0, -1, "authentication failed during image boot."); - } - - if (got_cnt > count) { - LOG_ERROR("pread(,`,`) return `. got_cnt:` > count:`, restart pread.", count, - offset, ret, got_cnt, count); - got_cnt = 0; - } - - io_sleep(try_cnt); - try_cnt++; - - if (try_cnt % 300 == 0) { - LOG_ERROR("pread read partial data. count:`, offset:`, ret:`, got_cnt:`, errno:`", - count, offset, ret, got_cnt, errno); - } - } - return -1; - } -}; - - -IFile *new_sure_file(IFile *src_file, ImageFile *image_file, - bool ownership) { - if (!src_file) { - LOG_ERROR("failed to new_sure_file(null)"); - return nullptr; - } - return new SureFile(src_file, image_file, ownership); -} - -IFile *new_sure_file_by_path(const char *file_path, int open_flags, - ImageFile *image_file, bool ownership) { - auto file = open_localfile_adaptor(file_path, open_flags, 0644, 0); - return new_sure_file(file, image_file, ownership); -} \ No newline at end of file diff --git a/src/sure_file.h b/src/sure_file.h deleted file mode 100644 index 1aaccd74..00000000 --- a/src/sure_file.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -#pragma once -#include - -class ImageFile; - -IFile *new_sure_file(IFile *src_file, ImageFile *image_file, - bool ownership = true); -IFile *new_sure_file_by_path(const char *file_path, int open_flags, - ImageFile *image_file, bool ownership = true); \ No newline at end of file From 0d835b18487132c3b8e40d0c9811b099fded79f7 Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Fri, 25 Aug 2023 19:23:09 +0800 Subject: [PATCH 03/31] refactor some logs Signed-off-by: yuchen.cc --- src/bk_download.cpp | 3 +-- src/image_file.cpp | 12 +++++------- src/overlaybd/extfs/extfs.cpp | 6 ++---- src/overlaybd/tar/libtar.cpp | 32 ++++++++++---------------------- src/overlaybd/tar/tar_file.cpp | 3 +++ src/overlaybd/tar/whiteout.cpp | 6 ++---- src/switch_file.cpp | 7 +++---- 7 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/bk_download.cpp b/src/bk_download.cpp index 74dd0339..f0690ed1 100644 --- a/src/bk_download.cpp +++ b/src/bk_download.cpp @@ -128,8 +128,7 @@ bool BkDownload::download_done() { int ret = lfs->rename(old_name.c_str(), new_name.c_str()); if (ret != 0) { - LOG_ERROR("rename(`,`), `:`", old_name, new_name, errno, strerror(errno)); - return false; + LOG_ERRNO_RETURN(0, false, "rename(`,`) failed", old_name, new_name); } LOG_INFO("download verify done. rename(`,`) success", old_name, new_name); return true; diff --git a/src/image_file.cpp b/src/image_file.cpp index 7a1b5f25..c31df0a4 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -58,7 +58,7 @@ IFile *ImageFile::__open_ro_file(const std::string &path) { auto file = open_localfile_adaptor(path.c_str(), flags, 0644, ioengine); if (!file) { set_failed("failed to open local file " + path); - LOG_ERROR_RETURN(0, nullptr, "open(`),`:`", path, errno, strerror(errno)); + LOG_ERRNO_RETURN(0, nullptr, "open(`) failed", path); } if (flags & O_DIRECT) { @@ -67,8 +67,7 @@ IFile *ImageFile::__open_ro_file(const std::string &path) { if (!aligned_file) { set_failed("failed to open aligned_file_adaptor " + path); delete file; - LOG_ERROR_RETURN(0, nullptr, "new_aligned_file_adaptor(`) failed, `:`", path, errno, - strerror(errno)); + LOG_ERRNO_RETURN(0, nullptr, "new_aligned_file_adaptor(`) failed", path); } file = aligned_file; } @@ -77,8 +76,7 @@ IFile *ImageFile::__open_ro_file(const std::string &path) { if (!switch_file) { set_failed("failed to open switch file `" + path); delete file; - LOG_ERROR_RETURN(0, nullptr, "new_switch_file(`) failed, `,:`", path, errno, - strerror(errno)); + LOG_ERRNO_RETURN(0, nullptr, "new_switch_file(`) failed", path); } file = switch_file; @@ -89,7 +87,7 @@ IFile *ImageFile::__open_ro_target_file(const std::string &path) { auto file = open_localfile_adaptor(path.c_str(), O_RDONLY, 0644, 0); if (!file) { set_failed("failed to open local data file " + path); - LOG_ERROR_RETURN(0, nullptr, "open(`),`:`", path, errno, strerror(errno)); + LOG_ERRNO_RETURN(0, nullptr, "open(`) failed", path); } return file; } @@ -281,7 +279,7 @@ int ImageFile::open_lower_layer(IFile *&file, ImageConfigNS::LayerConfig &layer, auto gz_index = open_localfile_adaptor(layer.gzipIndex().c_str(), O_RDONLY, 0644, 0); if (!gz_index) { set_failed("failed to open gzip index " + layer.gzipIndex()); - LOG_ERROR_RETURN(0, -1, "open(`),`:`", layer.gzipIndex(), errno, strerror(errno)); + LOG_ERRNO_RETURN(0, -1, "open(`) failed", layer.gzipIndex()); } target_file = new_gzfile(target_file, gz_index, true); if (image_service.global_conf.gzipCacheConfig().enable() && layer.targetDigest() != "") { diff --git a/src/overlaybd/extfs/extfs.cpp b/src/overlaybd/extfs/extfs.cpp index ce7ac8ce..a197fcb9 100644 --- a/src/overlaybd/extfs/extfs.cpp +++ b/src/overlaybd/extfs/extfs.cpp @@ -57,15 +57,13 @@ ext2_filsys do_ext2fs_open(io_manager extfs_manager) { ); if (ret) { errno = -parse_extfs_error(nullptr, 0, ret); - LOG_ERROR("failed ext2fs_open, errno `:`", errno, strerror(errno)); - return nullptr; + LOG_ERRNO_RETURN(0, nullptr, "failed ext2fs_open"); } ret = ext2fs_read_bitmaps(fs); if (ret) { errno = -parse_extfs_error(fs, 0, ret); - LOG_ERROR("failed ext2fs_read_bitmaps, errno `:`", errno, strerror(errno)); ext2fs_close(fs); - return nullptr; + LOG_ERRNO_RETURN(0, nullptr, "failed ext2fs_read_bitmaps"); } LOG_INFO("ext2fs opened"); return fs; diff --git a/src/overlaybd/tar/libtar.cpp b/src/overlaybd/tar/libtar.cpp index 7781ea7c..866391bb 100644 --- a/src/overlaybd/tar/libtar.cpp +++ b/src/overlaybd/tar/libtar.cpp @@ -44,15 +44,13 @@ int UnTar::set_file_perms(const char *filename) { /* change owner/group */ if (geteuid() == 0) { if (fs->lchown(filename, uid, gid) == -1) { - LOG_ERROR("lchown failed, filename `, `", filename, strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "lchown failed, filename `, uid `, gid `", filename, uid, gid); } } /* change access/modification time */ if (fs->lutimes(filename, tv) == -1) { - LOG_ERROR("lutimes failed, filename `, `", filename, strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "lutimes failed, filename `", filename); } /* change permissions */ @@ -69,8 +67,7 @@ int UnTar::set_file_perms(const char *filename) { return 0; } if (fs->chmod(filename, mode) == -1) { - LOG_ERROR("chmod failed `", strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "chmod failed, filename `, mode `", filename, mode); } return 0; @@ -95,8 +92,7 @@ int UnTar::extract_all() { while ((i = read_header()) == 0) { if (extract_file() != 0) { - LOG_ERROR("extract failed, filename `, `", get_pathname(), strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "extract failed, filename `", get_pathname()); } if (TH_ISDIR(header)) { dirs.emplace_back(std::make_pair(std::string(get_pathname()), header.get_mtime())); @@ -111,8 +107,7 @@ int UnTar::extract_all() { tv[0].tv_sec = tv[1].tv_sec = dir.second; tv[0].tv_usec = tv[1].tv_usec = 0; if (fs->lutimes(path.c_str(), tv) == -1) { - LOG_ERROR("utime failed, filename `, `", dir.first.c_str(), strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "utime failed, filename `", dir.first.c_str()); } } @@ -151,15 +146,11 @@ int UnTar::extract_file() { } else { if (!S_ISDIR(s.st_mode)) { if (fs->unlink(npath.c_str()) == -1 && errno != ENOENT) { - LOG_ERROR("remove exist file ` failed, `", npath.c_str(), strerror(errno)); - errno = EEXIST; - return -1; + LOG_ERRNO_RETURN(EEXIST, -1, "remove exist file ` failed", npath.c_str()); } } else if (!TH_ISDIR(header)) { if (remove_all(npath) == -1) { - LOG_ERROR("remove exist dir ` failed, `", npath.c_str(), strerror(errno)); - errno = EEXIST; - return -1; + LOG_ERRNO_RETURN(EEXIST, -1, "remove exist dir ` failed", npath.c_str()); } } } @@ -290,8 +281,7 @@ int UnTar::extract_hardlink(const char *filename) { char *linktgt = get_linkname(); LOG_DEBUG(" ==> extracting: ` (link to `)", filename, linktgt); if (fs->link(linktgt, filename) == -1) { - LOG_ERROR("link failed, `", strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "link failed, filename `, linktgt `", filename, linktgt); } return 0; } @@ -300,8 +290,7 @@ int UnTar::extract_symlink(const char *filename) { char *linktgt = get_linkname(); LOG_DEBUG(" ==> extracting: ` (symlink to `)", filename, linktgt); if (fs->symlink(linktgt, filename) == -1) { - LOG_ERROR("symlink failed, `", strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "symlink failed, filename `, linktgt `", filename, linktgt); } return 0; } @@ -327,8 +316,7 @@ int UnTar::extract_block_char_fifo(const char *filename) { LOG_DEBUG(" ==> extracting: ` (block/char/fifo `,`)", filename, devmaj, devmin); if (fs->mknod(filename, mode, makedev(devmaj, devmin)) == -1) { - LOG_ERROR("block/char/fifo failed, `", strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "block/char/fifo failed, filename `", filename); } return 0; diff --git a/src/overlaybd/tar/tar_file.cpp b/src/overlaybd/tar/tar_file.cpp index ee520d2d..4923705c 100644 --- a/src/overlaybd/tar/tar_file.cpp +++ b/src/overlaybd/tar/tar_file.cpp @@ -332,6 +332,9 @@ IFile *new_tar_file(IFile *file, bool create) { } IFile *open_tar_file(IFile *file) { + if (!file) { + LOG_ERROR_RETURN(0, nullptr, "file is nullptr"); + } auto ret = is_tar_file(file); if (ret == 1) { LOG_INFO("open file as tar file"); diff --git a/src/overlaybd/tar/whiteout.cpp b/src/overlaybd/tar/whiteout.cpp index ceee6b1d..fa5c0d4f 100644 --- a/src/overlaybd/tar/whiteout.cpp +++ b/src/overlaybd/tar/whiteout.cpp @@ -59,14 +59,12 @@ int UnTar::remove_all(const std::string &path, bool rmdir) { return 0; } } else { - LOG_ERROR("get path ` stat failed, errno `:`", path, errno, strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "get path ` stat failed", path); } auto dirs = fs->opendir(path.c_str()); if (dirs == nullptr) { - LOG_ERROR("open dir ` failed, errno `:`", path, errno, strerror(errno)); - return -1; + LOG_ERRNO_RETURN(0, -1, "open dir ` failed", path); } dirent *dirInfo; while ((dirInfo = dirs->get()) != nullptr) { diff --git a/src/switch_file.cpp b/src/switch_file.cpp index 57815c9d..66bd1442 100644 --- a/src/switch_file.cpp +++ b/src/switch_file.cpp @@ -46,13 +46,12 @@ static IFile *try_open_zfile(IFile *file, bool verify, const char *file_path) { if (is_zfile == 1) { auto zf = ZFile::zfile_open_ro(file, verify, true); if (!zf) { - LOG_ERROR_RETURN(0, nullptr, "zfile_open_ro failed, path: `: error: `(`)", file_path, - errno, strerror(errno)); + LOG_ERRNO_RETURN(0, nullptr, "zfile_open_ro failed, path: `", file_path); } - LOG_INFO("open file as zfile"); + LOG_INFO("open file as zfile format, path: `", file_path); return zf; } - LOG_INFO("file is not zfile format"); + LOG_INFO("file is not zfile format, path: `", file_path); return file; } From ad2a737018734ba5599a2e36b5feb3eb9d67d667 Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Mon, 28 Aug 2023 19:15:59 +0800 Subject: [PATCH 04/31] fix bug in retry open zfile Signed-off-by: yuchen.cc --- src/image_file.cpp | 24 ++++++++++++++++++++---- src/overlaybd/zfile/zfile.cpp | 7 ++++++- src/switch_file.cpp | 18 ++++++++++++------ 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/image_file.cpp b/src/image_file.cpp index c31df0a4..c2e2baec 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -34,6 +34,7 @@ #include "switch_file.h" #include "overlaybd/gzip/gz.h" #include "overlaybd/gzindex/gzfile.h" +#include "overlaybd/tar/tar_file.h" #define PARALLEL_LOAD_INDEX 32 using namespace photon::fs; @@ -71,10 +72,18 @@ IFile *ImageFile::__open_ro_file(const std::string &path) { } file = aligned_file; } + + auto tar_file = new_tar_file_adaptor(file); + if (!tar_file) { + set_failed("failed to open file as tar file " + path); + delete file; + LOG_ERROR_RETURN(0, nullptr, "new_tar_file_adaptor(`) failed", path); + } + file = tar_file; // set to local, no need to switch, for zfile and audit ISwitchFile *switch_file = new_switch_file(file, true, path.c_str()); if (!switch_file) { - set_failed("failed to open switch file `" + path); + set_failed("failed to open switch file " + path); delete file; LOG_ERRNO_RETURN(0, nullptr, "new_switch_file(`) failed", path); } @@ -158,10 +167,17 @@ IFile *ImageFile::__open_ro_remote(const std::string &dir, const std::string &di remote_file->ioctl(SET_SIZE, size); remote_file->ioctl(SET_LOCAL_DIR, dir); - ISwitchFile *switch_file = new_switch_file(remote_file); - if (!switch_file) { - set_failed("failed to open switch file `" + url); + IFile *tar_file = new_tar_file_adaptor(remote_file); + if (!tar_file) { + set_failed("failed to open remote file as tar file " + url); delete remote_file; + LOG_ERROR_RETURN(0, nullptr, "failed to open remote file as tar file `", url); + } + + ISwitchFile *switch_file = new_switch_file(tar_file, false, url.c_str()); + if (!switch_file) { + set_failed("failed to open switch file " + url); + delete tar_file; LOG_ERROR_RETURN(0, nullptr, "failed to open switch file `", url); } diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index d75e7036..f8577442 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -338,13 +338,18 @@ class CompressionFile : public VirtualReadOnlyFile { int get_current_block() { m_reader->m_buf_offset = m_reader->get_buf_offset(m_reader->m_idx); - if ((size_t)(m_reader->m_buf_offset) > sizeof(m_buf)) { + if ((size_t)(m_reader->m_buf_offset) >= sizeof(m_buf)) { m_reader->m_eno = ERANGE; LOG_ERRNO_RETURN(0, -1, "get inner buffer offset failed."); } auto blk_idx = m_reader->m_idx; compressed_size = m_reader->compressed_size(); + if ((size_t)(m_reader->m_buf_offset) + compressed_size > sizeof(m_buf)) { + m_reader->m_eno = ERANGE; + LOG_ERRNO_RETURN(0, -1, "inner buffer offset (`) + compressed size (`) overflow.", + m_reader->m_buf_offset, compressed_size); + } if (blk_idx == m_reader->m_begin_idx) { cp_begin = m_reader->get_inblock_offset(m_reader->m_offset); diff --git a/src/switch_file.cpp b/src/switch_file.cpp index 66bd1442..f3a47eb8 100644 --- a/src/switch_file.cpp +++ b/src/switch_file.cpp @@ -39,7 +39,6 @@ using namespace photon::fs; static IFile *try_open_zfile(IFile *file, bool verify, const char *file_path) { auto is_zfile = ZFile::is_zfile(file); if (is_zfile == -1) { - delete file; LOG_ERRNO_RETURN(0, nullptr, "check file type failed."); } // open zfile @@ -82,9 +81,16 @@ class SwitchFile : public ISwitchFile { LOG_ERROR("failed to open commit file, path: `", m_filepath); return; } - - file = try_open_zfile(new_tar_file_adaptor(file), false, m_filepath.c_str()); - if (file == nullptr) { + auto tarfile = new_tar_file_adaptor(file); + if (tarfile == nullptr) { + delete file; + LOG_ERROR("failed to open commit file as tar file, path: `", m_filepath); + return; + } + file = tarfile; + auto zfile = try_open_zfile(file, false, m_filepath.c_str()); + if (zfile == nullptr) { + delete file; LOG_ERROR("failed to open commit file as zfile, path: `", m_filepath); return; } @@ -157,11 +163,11 @@ class SwitchFile : public ISwitchFile { }; ISwitchFile *new_switch_file(IFile *source, bool local, const char *file_path) { - // if tar file, open tar file int retry = 1; again: - auto file = try_open_zfile(new_tar_file_adaptor(source), !local, file_path); + auto file = try_open_zfile(source, !local, file_path); if (file == nullptr) { + LOG_ERROR("failed to open source file as zfile, path: `, retry: `", file_path, retry); if (retry--) // may retry after cache evict goto again; return nullptr; From 8c7b2e7d5a50e96087f1f183315499725db5e0b7 Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Wed, 30 Aug 2023 18:19:53 +0800 Subject: [PATCH 05/31] Fix bugs in zfile evict and build jump table. Signed-off-by: yuchen.cc --- src/overlaybd/tar/tar_file.cpp | 6 ++++++ src/overlaybd/zfile/zfile.cpp | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/overlaybd/tar/tar_file.cpp b/src/overlaybd/tar/tar_file.cpp index 4923705c..57552ead 100644 --- a/src/overlaybd/tar/tar_file.cpp +++ b/src/overlaybd/tar/tar_file.cpp @@ -158,6 +158,12 @@ class TarFile : public ForwardFile_Ownership { virtual ssize_t pwritev(const struct iovec *iov, int iovcnt, off_t offset) override { return m_file->pwritev(iov, iovcnt, offset + base_offset); } + virtual int fallocate(int mode, off_t offset, off_t len) override { + return m_file->fallocate(mode, offset + base_offset, len); + } + virtual int fadvise(off_t offset, off_t len, int advice) override { + return m_file->fadvise(offset + base_offset, len, advice); + } virtual int close() override { if (is_new_tar()) { diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index f8577442..381d6e43 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -182,6 +182,8 @@ class CompressionFile : public VirtualReadOnlyFile { } int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size) { + partial_offset.clear(); + deltas.clear(); group_size = (uinttype_max + 1) / block_size; partial_offset.reserve(n / group_size + 1); deltas.reserve(n + 1); @@ -196,8 +198,8 @@ class CompressionFile : public VirtualReadOnlyFile { continue; } if ((uint64_t)deltas[i - 1] + ibuf[i - 1] >= (uint64_t)uinttype_max) { - LOG_ERRNO_RETURN(ERANGE, -1, "build block[`] length failed `+` > ` (exceed)", - deltas[i-1], ibuf[i-1], (uint64_t)uinttype_max); + LOG_ERROR_RETURN(ERANGE, -1, "build block[`] length failed `+` > ` (exceed)", + i-1, deltas[i-1], ibuf[i-1], (uint64_t)uinttype_max); } deltas.push_back(deltas[i - 1] + ibuf[i - 1]); } @@ -255,14 +257,16 @@ class CompressionFile : public VirtualReadOnlyFile { int reload(size_t idx) { auto read_size = get_blocks_length(idx, idx + 1); auto begin_offset = m_zfile->m_jump_table[idx]; + LOG_WARN("trim and reload. (idx: `, offset: `, len: `)", idx, begin_offset, read_size); int trim_res = m_zfile->m_file->trim(begin_offset, read_size); if (trim_res < 0) { - LOG_ERROR_RETURN(0, -1, "failed to trim block idx: `", idx); + LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", + idx, begin_offset, read_size); } auto readn = m_zfile->m_file->pread(m_buf + m_buf_offset, read_size, begin_offset); if (readn != (ssize_t)read_size) { - LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (offset: `, len: `)", - begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (idx: `, offset: `, len: `)", + idx, begin_offset, read_size); } return 0; } @@ -716,9 +720,12 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile if (ret < (ssize_t)index_bytes) { LOG_ERRNO_RETURN(0, false, "failed to read index"); } - jump_table.build(ibuf.get(), pht->index_size, + ret = jump_table.build(ibuf.get(), pht->index_size, CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size, pht->opt.block_size); + if (ret != 0) { + LOG_ERRNO_RETURN(0, false, "failed to build jump table"); + } if (pheader_trailer) *pheader_trailer = *pht; return true; From ef7ef4e2b4f3c1d514e8ab4a80ced17625e2350d Mon Sep 17 00:00:00 2001 From: "zhuangbowei.zbw" Date: Mon, 4 Sep 2023 14:07:35 +0800 Subject: [PATCH 06/31] [bugfix] duplicate bearer Signed-off-by: zhuangbowei.zbw --- src/overlaybd/registryfs/registryfs_v2.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/overlaybd/registryfs/registryfs_v2.cpp b/src/overlaybd/registryfs/registryfs_v2.cpp index 2510d369..5b1dd590 100644 --- a/src/overlaybd/registryfs/registryfs_v2.cpp +++ b/src/overlaybd/registryfs/registryfs_v2.cpp @@ -138,8 +138,7 @@ class RegistryFSImpl_v2 : public RegistryFS { op.req.reset(Verb::GET, *actual_url); // set token if needed if (actual_info->mode == UrlMode::Self && !actual_info->info.empty()) { - op.req.headers.insert(kAuthHeaderKey, "Bearer "); - op.req.headers.value_append(actual_info->info); + op.req.headers.insert(kAuthHeaderKey, actual_info->info); } op.req.headers.range(offset, offset + count - 1); op.set_enable_proxy(m_client->has_proxy()); From c69b0d318837eb596cb0d8272b1d3e25c5b3c4a4 Mon Sep 17 00:00:00 2001 From: Yifan Yuan Date: Mon, 11 Sep 2023 11:54:09 +0800 Subject: [PATCH 07/31] [bugfix] enhance bound check on building jumptable Signed-off-by: Yifan Yuan --- src/overlaybd/zfile/zfile.cpp | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index 381d6e43..562e2c56 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -181,7 +181,8 @@ class CompressionFile : public VirtualReadOnlyFile { return deltas.size(); } - int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size) { + int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size, + bool enable_crc) { partial_offset.clear(); deltas.clear(); group_size = (uinttype_max + 1) / block_size; @@ -190,7 +191,11 @@ class CompressionFile : public VirtualReadOnlyFile { auto raw_offset = offset_begin; partial_offset.push_back(raw_offset); deltas.push_back(0); + size_t min_blksize = (enable_crc ? sizeof(uint32_t) : 0); for (ssize_t i = 1; i < (ssize_t)n + 1; i++) { + if (ibuf[i - 1] <= min_blksize) { + LOG_ERRNO_RETURN(EIO, -1, "unexpected block size(id: `):", i - 1, ibuf[i - 1]); + } raw_offset += ibuf[i - 1]; if ((i % group_size) == 0) { partial_offset.push_back(raw_offset); @@ -199,7 +204,7 @@ class CompressionFile : public VirtualReadOnlyFile { } if ((uint64_t)deltas[i - 1] + ibuf[i - 1] >= (uint64_t)uinttype_max) { LOG_ERROR_RETURN(ERANGE, -1, "build block[`] length failed `+` > ` (exceed)", - i-1, deltas[i-1], ibuf[i-1], (uint64_t)uinttype_max); + i - 1, deltas[i - 1], ibuf[i - 1], (uint64_t)uinttype_max); } deltas.push_back(deltas[i - 1] + ibuf[i - 1]); } @@ -260,13 +265,14 @@ class CompressionFile : public VirtualReadOnlyFile { LOG_WARN("trim and reload. (idx: `, offset: `, len: `)", idx, begin_offset, read_size); int trim_res = m_zfile->m_file->trim(begin_offset, read_size); if (trim_res < 0) { - LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", - idx, begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", idx, + begin_offset, read_size); } auto readn = m_zfile->m_file->pread(m_buf + m_buf_offset, read_size, begin_offset); if (readn != (ssize_t)read_size) { - LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (idx: `, offset: `, len: `)", - idx, begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, + "read compressed blocks failed. (idx: `, offset: `, len: `)", idx, + begin_offset, read_size); } return 0; } @@ -351,8 +357,9 @@ class CompressionFile : public VirtualReadOnlyFile { compressed_size = m_reader->compressed_size(); if ((size_t)(m_reader->m_buf_offset) + compressed_size > sizeof(m_buf)) { m_reader->m_eno = ERANGE; - LOG_ERRNO_RETURN(0, -1, "inner buffer offset (`) + compressed size (`) overflow.", - m_reader->m_buf_offset, compressed_size); + LOG_ERRNO_RETURN(0, -1, + "inner buffer offset (`) + compressed size (`) overflow.", + m_reader->m_buf_offset, compressed_size); } if (blk_idx == m_reader->m_begin_idx) { @@ -439,15 +446,15 @@ class CompressionFile : public VirtualReadOnlyFile { if (count <= 0) return 0; if (offset + count > m_ht.original_file_size) { - LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", - offset + count, m_ht.original_file_size); + LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", offset + count, + m_ht.original_file_size); } ssize_t readn = 0; // final will equal to count unsigned char raw[MAX_READ_SIZE]; BlockReader br(this, offset, count); for (auto &block : br) { if (buf == nullptr) { - //used for prefetch; no copy, no decompress; + // used for prefetch; no copy, no decompress; readn += block.cp_len; continue; } @@ -506,7 +513,7 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo CompressionFile::HeaderTrailer *pht, off_t offset = -1); ssize_t compress_data(ICompressor *compressor, const unsigned char *buf, size_t count, - unsigned char *dest_buf, size_t dest_len, bool gen_crc) { + unsigned char *dest_buf, size_t dest_len, bool gen_crc) { ssize_t compressed_len = 0; auto ret = compressor->compress((const unsigned char *)buf, count, dest_buf, dest_len); @@ -721,8 +728,8 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile LOG_ERRNO_RETURN(0, false, "failed to read index"); } ret = jump_table.build(ibuf.get(), pht->index_size, - CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size, - pht->opt.block_size); + CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size, + pht->opt.block_size, pht->opt.verify); if (ret != 0) { LOG_ERRNO_RETURN(0, false, "failed to build jump table"); } @@ -745,8 +752,7 @@ IFile *zfile_open_ro(IFile *file, bool verify, bool ownership) { auto res = file->fallocate(0, 0, -1); LOG_ERROR("failed to load jump table, fallocate result: `", res); if (res < 0) { - LOG_ERRNO_RETURN(0, nullptr, - "failed to load jump table and failed to evict"); + LOG_ERRNO_RETURN(0, nullptr, "failed to load jump table and failed to evict"); } if (retry--) { LOG_INFO("retry loading jump table"); From 7315c312ca8d0c5a854b619174c1fa2910b5b610 Mon Sep 17 00:00:00 2001 From: Yifan Yuan Date: Sat, 9 Sep 2023 21:11:25 +0800 Subject: [PATCH 08/31] save digest of zfile's header/trailer and index Signed-off-by: Yifan Yuan --- src/overlaybd/zfile/format_spec.md | 15 +++-- src/overlaybd/zfile/test/test.cpp | 32 +++++++++++ src/overlaybd/zfile/zfile.cpp | 91 ++++++++++++++++++++++-------- 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/src/overlaybd/zfile/format_spec.md b/src/overlaybd/zfile/format_spec.md index 07cba407..aa6aa263 100644 --- a/src/overlaybd/zfile/format_spec.md +++ b/src/overlaybd/zfile/format_spec.md @@ -21,14 +21,15 @@ The format of header is described as below. All fields are little-endian. | :---: | :----: | :----: | :--- | | magic0 | 0 | 8 | "ZFile\0\1" (and an implicit '\0') | | magic1 | 8 | 16 | 74 75 6A 69, 2E 79 79 66, 40 41 6C 69, 62 61 62 61 | -| size | 24 | uint32_t | size of the header struct (108), excluding the tail padding | -| reserved| 28 | 4 | reserved space, should be 0 | +| size | 24 | uint32_t | size of the header structure, excluding the tail padding | +| digest | 28 | uint32_t | checksum for the range 28-511 bytes in header | | flags | 32 | uint64_t | bits for flags* (see later for details) | | index_offset | 40 | uint64_t | index offset | -| index_size | 48 | uint64_t | size of the index section, possibly compressed| +| index_size | 48 | uint64_t | size of the index section, possibly compressed base on flags | | original_file_size | 56 | uint64_t | size of the orignal file before compression | -| reserved| 64 | 8 | reserved space, should be 0 | -| block_size | 72 | uint32_t | size of each compression block | +| index_crc | 64 | uint32_t | checksum value of index | +| reserved| 68 | 4 | reserved space, should be 0 | +| block_size| 72 | uint32_t | size of each compression block | | algo | 76 | uint8_t | compression algorithm | | level | 77 | uint8_t | compression level | | use_dict| 78 | bool | whether use dictionary | @@ -45,7 +46,9 @@ The format of header is described as below. All fields are little-endian. | type | 1 | this is a data file (1) or index file (0) | | sealed | 2 | this file is sealed (1) or not (0) | | info_valid | 3 | information validity of the fields *after* flags (they were initially invalid (0) after creation; and readers must resort to trailer when they meet such headers) | -| reserved | 4~63 | reserved for future use; must be 0s | +| digest | 4 | the digest of this header/trailer has been recorded in the digest field | +| index_comperssion | 5 | whether the index has been compressed(1) or not(0) | +| reserved | 6~63 | reserved for future use; must be 0s | ## index diff --git a/src/overlaybd/zfile/test/test.cpp b/src/overlaybd/zfile/test/test.cpp index b497da2d..b685e415 100644 --- a/src/overlaybd/zfile/test/test.cpp +++ b/src/overlaybd/zfile/test/test.cpp @@ -227,6 +227,38 @@ TEST_F(ZFileTest, validation_check) { EXPECT_NE(zfile_validation_check(fdst.get()), 0); } +TEST_F(ZFileTest, ht_check) { + // log_output_level = 1; + auto fn_src = "verify.data"; + auto fn_zfile = "verify.zfile"; + auto src = lfs->open(fn_src, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644); + unique_ptr fsrc(src); + if (!fsrc) { + LOG_ERROR("err: `(`)", errno, strerror(errno)); + } + randwrite(fsrc.get(), 1024); + struct stat _st; + if (fsrc->fstat(&_st) != 0) { + LOG_ERROR("err: `(`)", errno, strerror(errno)); + return; + } + auto dst = lfs->open(fn_zfile, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644); + unique_ptr fdst(dst); + if (!fdst) { + LOG_ERROR("err: `(`)", errno, strerror(errno)); + } + CompressOptions opt; + opt.algo = CompressOptions::LZ4; + opt.verify = 1; + CompressArgs args(opt); + int ret = zfile_compress(fsrc.get(), fdst.get(), &args); + EXPECT_EQ(ret, 0); + auto x=2324; + dst->pwrite(&x, sizeof(x), 400); + EXPECT_NE(zfile_validation_check(fdst.get()), 0); + EXPECT_EQ(is_zfile(dst), -1); +} + TEST_F(ZFileTest, dsa) { const int buf_size = 1024; const int crc_count = 3000; diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index 381d6e43..7b233bf9 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -86,13 +86,16 @@ class CompressionFile : public VirtualReadOnlyFile { // offset 24, 28, 32 uint32_t size = sizeof(HeaderTrailer); - uint32_t __padding = 0; + // uint32_t __padding = 0; + uint32_t digest = 0; uint64_t flags; static const uint32_t FLAG_SHIFT_HEADER = 0; // 1:header 0:trailer static const uint32_t FLAG_SHIFT_TYPE = 1; // 1:data file, 0:index file static const uint32_t FLAG_SHIFT_SEALED = 2; // 1:YES, 0:NO - static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3; + static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3; // overwrite trailer info to header + static const uint32_t FLAG_SHIFT_CALC_DIGEST = 4; // caculate digest for zfile header/trailer and jumptable + static const uint32_t FLAG_SHIFT_IDX_COMP = 5; // compress zfile index(jumptable) uint32_t get_flag_bit(uint32_t shift) const { return flags & (1 << shift); @@ -121,6 +124,17 @@ class CompressionFile : public VirtualReadOnlyFile { bool is_sealed() const { return get_flag_bit(FLAG_SHIFT_SEALED); } + bool is_digest_enabled() { + return get_flag_bit(FLAG_SHIFT_CALC_DIGEST); + } + bool is_valid() { + if (!is_digest_enabled()) return true; + auto saved_crc = this->digest; + this->digest = 0; + DEFER(this->digest = saved_crc;); + auto crc = crc32::crc32c(this, CompressionFile::HeaderTrailer::SPACE); + return crc == saved_crc; + } void set_header() { set_flag_bit(FLAG_SHIFT_HEADER); } @@ -143,6 +157,14 @@ class CompressionFile : public VirtualReadOnlyFile { set_flag_bit(FLAG_SHIFT_HEADER_OVERWRITE); } + void set_digest_enable() { + set_flag_bit(FLAG_SHIFT_CALC_DIGEST); + } + + void set_compress_index() { + set_flag_bit(FLAG_SHIFT_IDX_COMP); + } + void set_compress_option(const CompressOptions &opt) { this->opt = opt; } @@ -151,7 +173,8 @@ class CompressionFile : public VirtualReadOnlyFile { uint64_t index_offset; // in bytes uint64_t index_size; // # of SegmentMappings uint64_t original_file_size; - uint64_t reserved_0; + uint32_t index_crc; + uint32_t reserved_0; // offset 72 CompressOptions opt; @@ -181,7 +204,8 @@ class CompressionFile : public VirtualReadOnlyFile { return deltas.size(); } - int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size) { + int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size, + bool enable_crc) { partial_offset.clear(); deltas.clear(); group_size = (uinttype_max + 1) / block_size; @@ -190,7 +214,11 @@ class CompressionFile : public VirtualReadOnlyFile { auto raw_offset = offset_begin; partial_offset.push_back(raw_offset); deltas.push_back(0); + size_t min_blksize = (enable_crc ? sizeof(uint32_t) : 0); for (ssize_t i = 1; i < (ssize_t)n + 1; i++) { + if (ibuf[i - 1] <= min_blksize) { + LOG_ERRNO_RETURN(EIO, -1, "unexpected block size(id: `):", i - 1, ibuf[i - 1]); + } raw_offset += ibuf[i - 1]; if ((i % group_size) == 0) { partial_offset.push_back(raw_offset); @@ -199,7 +227,7 @@ class CompressionFile : public VirtualReadOnlyFile { } if ((uint64_t)deltas[i - 1] + ibuf[i - 1] >= (uint64_t)uinttype_max) { LOG_ERROR_RETURN(ERANGE, -1, "build block[`] length failed `+` > ` (exceed)", - i-1, deltas[i-1], ibuf[i-1], (uint64_t)uinttype_max); + i - 1, deltas[i - 1], ibuf[i - 1], (uint64_t)uinttype_max); } deltas.push_back(deltas[i - 1] + ibuf[i - 1]); } @@ -260,13 +288,14 @@ class CompressionFile : public VirtualReadOnlyFile { LOG_WARN("trim and reload. (idx: `, offset: `, len: `)", idx, begin_offset, read_size); int trim_res = m_zfile->m_file->trim(begin_offset, read_size); if (trim_res < 0) { - LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", - idx, begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", idx, + begin_offset, read_size); } auto readn = m_zfile->m_file->pread(m_buf + m_buf_offset, read_size, begin_offset); if (readn != (ssize_t)read_size) { - LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (idx: `, offset: `, len: `)", - idx, begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, + "read compressed blocks failed. (idx: `, offset: `, len: `)", idx, + begin_offset, read_size); } return 0; } @@ -351,8 +380,9 @@ class CompressionFile : public VirtualReadOnlyFile { compressed_size = m_reader->compressed_size(); if ((size_t)(m_reader->m_buf_offset) + compressed_size > sizeof(m_buf)) { m_reader->m_eno = ERANGE; - LOG_ERRNO_RETURN(0, -1, "inner buffer offset (`) + compressed size (`) overflow.", - m_reader->m_buf_offset, compressed_size); + LOG_ERRNO_RETURN(0, -1, + "inner buffer offset (`) + compressed size (`) overflow.", + m_reader->m_buf_offset, compressed_size); } if (blk_idx == m_reader->m_begin_idx) { @@ -439,15 +469,15 @@ class CompressionFile : public VirtualReadOnlyFile { if (count <= 0) return 0; if (offset + count > m_ht.original_file_size) { - LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", - offset + count, m_ht.original_file_size); + LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", offset + count, + m_ht.original_file_size); } ssize_t readn = 0; // final will equal to count unsigned char raw[MAX_READ_SIZE]; BlockReader br(this, offset, count); for (auto &block : br) { if (buf == nullptr) { - //used for prefetch; no copy, no decompress; + // used for prefetch; no copy, no decompress; readn += block.cp_len; continue; } @@ -506,7 +536,7 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo CompressionFile::HeaderTrailer *pht, off_t offset = -1); ssize_t compress_data(ICompressor *compressor, const unsigned char *buf, size_t count, - unsigned char *dest_buf, size_t dest_len, bool gen_crc) { + unsigned char *dest_buf, size_t dest_len, bool gen_crc) { ssize_t compressed_len = 0; auto ret = compressor->compress((const unsigned char *)buf, count, dest_buf, dest_len); @@ -584,6 +614,7 @@ class ZFileBuilder : public VirtualReadOnlyFile { LOG_ERRNO_RETURN(0, -1, "failed to write index."); } auto pht = (CompressionFile::HeaderTrailer *)m_ht; + pht->index_crc = crc32::crc32c(&m_block_len[0], index_bytes); pht->index_offset = index_offset; pht->index_size = index_size; pht->original_file_size = raw_data_size; @@ -674,7 +705,9 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile if (!pht->verify_magic() || !pht->is_header()) { LOG_ERROR_RETURN(0, false, "header magic/type don't match"); } - + if (pht->is_valid() == false) { + LOG_ERROR_RETURN(0, false, "digest verification failed."); + } struct stat stat; ret = file->fstat(&stat); if (ret < 0) { @@ -720,12 +753,20 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile if (ret < (ssize_t)index_bytes) { LOG_ERRNO_RETURN(0, false, "failed to read index"); } + if (pht->is_digest_enabled()) { + LOG_INFO("check jumptable CRC32 (` expected)", pht->index_crc); + auto crc = crc32::crc32c(ibuf.get(), index_bytes); + if (crc != pht->index_crc) { + LOG_ERRNO_RETURN(0, false, "checksum of jumptable is incorrect"); + } + } ret = jump_table.build(ibuf.get(), pht->index_size, - CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size, - pht->opt.block_size); + CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size, + pht->opt.block_size, pht->opt.verify); if (ret != 0) { LOG_ERRNO_RETURN(0, false, "failed to build jump table"); } + if (pheader_trailer) *pheader_trailer = *pht; return true; @@ -745,8 +786,7 @@ IFile *zfile_open_ro(IFile *file, bool verify, bool ownership) { auto res = file->fallocate(0, 0, -1); LOG_ERROR("failed to load jump table, fallocate result: `", res); if (res < 0) { - LOG_ERRNO_RETURN(0, nullptr, - "failed to load jump table and failed to evict"); + LOG_ERRNO_RETURN(0, nullptr, "failed to load jump table and failed to evict"); } if (retry--) { LOG_INFO("retry loading jump table"); @@ -787,7 +827,10 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo if (offset != -1) pht->set_header_overwrite(); - LOG_INFO("pht->opt.dict_size: `", pht->opt.dict_size); + pht->set_digest_enable(); // by default + pht->digest = 0; + pht->digest = crc32::crc32c(pht, CompressionFile::HeaderTrailer::SPACE); + LOG_INFO("save header/trailer with digest: `", pht->digest); if (offset == -1) { return (int)file->write(pht, CompressionFile::HeaderTrailer::SPACE); } @@ -812,7 +855,6 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { char buf[CompressionFile::HeaderTrailer::SPACE] = {}; auto pht = new (buf) CompressionFile::HeaderTrailer; pht->set_compress_option(opt); - LOG_INFO("write header."); auto ret = write_header_trailer(as, true, false, true, pht); if (ret < 0) { @@ -884,6 +926,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { if (as->write(&block_len[0], index_bytes) != index_bytes) { LOG_ERRNO_RETURN(0, -1, "failed to write index."); } + pht->index_crc = crc32::crc32c(&block_len[0], index_bytes); pht->index_offset = index_offset; pht->index_size = index_size; pht->original_file_size = raw_data_size; @@ -962,6 +1005,10 @@ int is_zfile(IFile *file) { LOG_DEBUG("file: ` is not a zfile object", file); return 0; } + if (!pht->is_valid()) { + LOG_ERRNO_RETURN(0, -1, + "file: ` is a zfile object but verify digest failed.", file); + } LOG_DEBUG("file: ` is a zfile object", file); return 1; } From a13e1320a93b45e60a555600b62cb4e143286818 Mon Sep 17 00:00:00 2001 From: Yifan Yuan Date: Tue, 19 Sep 2023 16:51:10 +0800 Subject: [PATCH 09/31] print digest and index checksum in ZFile log Signed-off-by: Yifan Yuan --- src/overlaybd/zfile/zfile.cpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index 7b233bf9..b2ab970e 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -128,11 +128,15 @@ class CompressionFile : public VirtualReadOnlyFile { return get_flag_bit(FLAG_SHIFT_CALC_DIGEST); } bool is_valid() { - if (!is_digest_enabled()) return true; + if (!is_digest_enabled()) { + LOG_WARN("digest not found in current zfile."); + return true; + } auto saved_crc = this->digest; this->digest = 0; DEFER(this->digest = saved_crc;); auto crc = crc32::crc32c(this, CompressionFile::HeaderTrailer::SPACE); + LOG_INFO("zfile digest: ` (` expected)", HEX(crc).width(8), HEX(saved_crc).width(8)); return crc == saved_crc; } void set_header() { @@ -490,8 +494,8 @@ class CompressionFile : public VirtualReadOnlyFile { int reload_res = block.reload(); LOG_ERROR( "checksum failed {offset: `, length: `} (expected ` but got `), reload result: `", - block.m_reader->m_buf_offset, block.compressed_size, block.crc32_code(), - c, reload_res); + block.m_reader->m_buf_offset, block.compressed_size, HEX(block.crc32_code()).width(8), + HEX(c).width(8), reload_res); if (reload_res < 0) { LOG_ERROR_RETURN(ECHECKSUM, -1, "checksum verification and reload failed"); @@ -754,10 +758,12 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile LOG_ERRNO_RETURN(0, false, "failed to read index"); } if (pht->is_digest_enabled()) { - LOG_INFO("check jumptable CRC32 (` expected)", pht->index_crc); + LOG_INFO("check jumptable CRC32 (` expected)", HEX(pht->index_crc).width(8)); auto crc = crc32::crc32c(ibuf.get(), index_bytes); if (crc != pht->index_crc) { - LOG_ERRNO_RETURN(0, false, "checksum of jumptable is incorrect"); + LOG_ERRNO_RETURN(0, false, "checksum of jumptable is incorrect. {got: `, expected: `}", + HEX(crc).width(8), HEX(pht->index_crc).width(8) + ); } } ret = jump_table.build(ibuf.get(), pht->index_size, @@ -800,8 +806,8 @@ IFile *zfile_open_ro(IFile *file, bool verify, bool ownership) { zfile->m_jump_table = std::move(jump_table); CompressArgs args(ht.opt); ht.opt.verify = ht.opt.verify && verify; - LOG_DEBUG("compress type: `, bs: `, verify_checksum: `", ht.opt.algo, ht.opt.block_size, - ht.opt.verify); + LOG_INFO("digest: `, compress type: `, bs: `, data_verify: `", + HEX(ht.digest).width(8), ht.opt.algo, ht.opt.block_size, ht.opt.verify); zfile->m_compressor.reset(create_compressor(&args)); zfile->m_ownership = ownership; @@ -830,7 +836,7 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo pht->set_digest_enable(); // by default pht->digest = 0; pht->digest = crc32::crc32c(pht, CompressionFile::HeaderTrailer::SPACE); - LOG_INFO("save header/trailer with digest: `", pht->digest); + LOG_INFO("save header/trailer with digest: `", HEX(pht->digest).width(8)); if (offset == -1) { return (int)file->write(pht, CompressionFile::HeaderTrailer::SPACE); } @@ -907,12 +913,12 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { if (crc32_verify) { auto crc32_code = crc32c(&compressed_data[j * buf_size], compressed_len[j]); LOG_DEBUG("append ` bytes crc32_code: {offset: `, count: `, crc32: `}", - sizeof(uint32_t), moffset, compressed_len[j], crc32_code); + sizeof(uint32_t), moffset, compressed_len[j], HEX(crc32_code).width(8)); compressed_len[j] += sizeof(uint32_t); ret = as->write(&crc32_code, sizeof(uint32_t)); if (ret < (ssize_t)sizeof(uint32_t)) { LOG_ERRNO_RETURN(0, -1, "failed to write crc32code, offset: `, crc32: `", - moffset, crc32_code); + moffset, HEX(crc32_code).width(8)); } } block_len.push_back(compressed_len[j]); @@ -927,6 +933,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { LOG_ERRNO_RETURN(0, -1, "failed to write index."); } pht->index_crc = crc32::crc32c(&block_len[0], index_bytes); + LOG_INFO("index checksum: `", HEX(pht->index_crc).width(8)); pht->index_offset = index_offset; pht->index_size = index_size; pht->original_file_size = raw_data_size; @@ -957,7 +964,7 @@ int zfile_decompress(IFile *src, IFile *dst) { for (off_t offset = 0; offset < raw_data_size; offset += block_size) { auto len = (ssize_t)std::min(block_size, (size_t)raw_data_size - offset); auto readn = file->pread(raw_buf.get(), len, offset); - LOG_DEBUG("readn: `, crc32: `", readn, crc32c(raw_buf.get(), len)); + LOG_DEBUG("readn: `, crc32: `", readn, HEX(crc32c(raw_buf.get(), len)).width(8)); if (readn != len) return -1; if (dst->write(raw_buf.get(), readn) != readn) { From 8e4726ccea1d9c6caf1686206b364da7561a7de5 Mon Sep 17 00:00:00 2001 From: "zhuangbowei.zbw" Date: Fri, 22 Sep 2023 17:39:25 +0800 Subject: [PATCH 10/31] [bugfix] turboOCI commit with uuid Signed-off-by: zhuangbowei.zbw --- src/overlaybd/lsmt/file.cpp | 5 +++ src/overlaybd/lsmt/test/lsmt-filetest.h | 5 +-- src/overlaybd/lsmt/test/test.cpp | 42 +++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/overlaybd/lsmt/file.cpp b/src/overlaybd/lsmt/file.cpp index 12ec4e16..f76004d4 100644 --- a/src/overlaybd/lsmt/file.cpp +++ b/src/overlaybd/lsmt/file.cpp @@ -1127,6 +1127,11 @@ class LSMTWarpFile : public LSMTFile { CompactOptions opts(&m_files, mapping.get(), m_index->size(), m_vsize, &args); LayerInfo info; info.virtual_size = m_vsize; + info.uuid.clear(); + if (UUID::String::is_valid((args.uuid).c_str())) { + LOG_INFO("set UUID: `", args.uuid.data); + info.uuid.parse(args.uuid); + } if (UUID::String::is_valid((args.parent_uuid).c_str())) { LOG_INFO("set parent UUID: `", args.parent_uuid.data); info.parent_uuid.parse(args.parent_uuid); diff --git a/src/overlaybd/lsmt/test/lsmt-filetest.h b/src/overlaybd/lsmt/test/lsmt-filetest.h index 053c5f83..384b9b5b 100644 --- a/src/overlaybd/lsmt/test/lsmt-filetest.h +++ b/src/overlaybd/lsmt/test/lsmt-filetest.h @@ -408,6 +408,7 @@ class FileTest2 : public FileTest { data_name.clear(); idx_name.clear(); layer_name.clear(); + parent_uuid = ""; } }; @@ -571,6 +572,6 @@ class WarpFileTest : public FileTest3 { public: void randwrite_warpfile(IFile *file, size_t nwrites); IFileRW *create_warpfile_rw(int io_engine = 0); - IFileRO *create_commit_warpfile(int io_engine = 0); - IFileRO *create_commit_warpfile(IFileRW* warpfile); + IFileRO *create_commit_warpfile(int io_engine = 0, bool keepUUID = false); + IFileRO *create_commit_warpfile(IFileRW* warpfile, bool keepUUID = false); }; diff --git a/src/overlaybd/lsmt/test/test.cpp b/src/overlaybd/lsmt/test/test.cpp index 6a730b2d..c69bfb35 100644 --- a/src/overlaybd/lsmt/test/test.cpp +++ b/src/overlaybd/lsmt/test/test.cpp @@ -785,13 +785,19 @@ IFileRW *WarpFileTest::create_warpfile_rw(int io_engine) { return file; } -IFileRO *WarpFileTest::create_commit_warpfile(IFileRW* warpfile) { +IFileRO *WarpFileTest::create_commit_warpfile(IFileRW* warpfile, bool keepUUID) { LOG_INFO("commit warpfile as `", layer_name.back().c_str()); auto fcommit = lfs->open(layer_name.back().c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRWXU); UUID uu; uu.parse(parent_uuid.c_str(), parent_uuid.size()); CommitArgs c(fcommit); c.parent_uuid = uu; + + // commit will clear layer's uuid if CommitArgs.uuid is not specify + if (keepUUID) { + warpfile->get_uuid(uu); + c.uuid = uu; + } ((IFileRW *)warpfile)->commit(c); delete warpfile; fcommit->close(); @@ -802,10 +808,10 @@ IFileRO *WarpFileTest::create_commit_warpfile(IFileRW* warpfile) { return ret; } -IFileRO *WarpFileTest::create_commit_warpfile(int io_engine) { +IFileRO *WarpFileTest::create_commit_warpfile(int io_engine, bool keepUUID) { auto warpfile = create_warpfile_rw(io_engine); randwrite_warpfile(warpfile, FLAGS_nwrites); - return create_commit_warpfile(warpfile); + return create_commit_warpfile(warpfile, keepUUID); } TEST_F(WarpFileTest, randwrite) { @@ -920,6 +926,36 @@ TEST_F(WarpFileTest, stack_files) { fcheck = nullptr; } +TEST_F(WarpFileTest, commit_without_uuid) { + CleanUp(); + UUID uu; + auto frw = create_warpfile_rw(ut_io_engine); + frw->get_uuid(uu); + LOG_INFO("uuid before commit is `", uu); + EXPECT_FALSE(uu.is_null()); + + auto fcommit = create_commit_warpfile(frw); + fcommit->get_uuid(uu); + LOG_INFO("uuid after commit is `", uu); + EXPECT_TRUE(uu.is_null()); +} + +TEST_F(WarpFileTest, commit_with_uuid) { + CleanUp(); + UUID uu; + auto frw = create_warpfile_rw(ut_io_engine); + frw->get_uuid(uu); + LOG_INFO("uuid before commit is `", uu); + EXPECT_FALSE(uu.is_null()); + + UUID uu_commit; + auto fcommit = create_commit_warpfile(frw, true); + fcommit->get_uuid(uu_commit); + LOG_INFO("uuid after commit is `", uu); + EXPECT_FALSE(uu_commit.is_null()); + EXPECT_EQ(uu, uu_commit); +} + int main(int argc, char **argv) { auto seed = 154574045; From 1c2299c3f9ecdc09e6287734dc10486f73e215bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B8=A3=E8=88=9F?= Date: Tue, 26 Sep 2023 15:32:35 +0800 Subject: [PATCH 11/31] build overlaybd using static compiled libcurl 7.42.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 鸣舟 --- CMake/FindCURL.cmake | 54 ++++++++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 2 +- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 CMake/FindCURL.cmake diff --git a/CMake/FindCURL.cmake b/CMake/FindCURL.cmake new file mode 100644 index 00000000..3b8c241f --- /dev/null +++ b/CMake/FindCURL.cmake @@ -0,0 +1,54 @@ +include(FetchContent) + +if(${BUILD_CURL_FROM_SOURCE}) + message("Add and build standalone libcurl") + include(FetchContent) + FetchContent_Declare( + curl_bundle + GIT_REPOSITORY https://github.com/curl/curl.git + GIT_TAG curl-7_42_1 + GIT_PROGRESS 1) + + FetchContent_GetProperties(curl_bundle) + + # In libcurl, CMakeLists build static lib is broken add build command via + # make + if(NOT TARGET libcurl_static_build) + if (NOT curl_bundle_POPULATED) + FetchContent_Populate(curl_bundle) + endif() + add_custom_command( + OUTPUT ${curl_bundle_BINARY_DIR}/lib/libcurl.a + WORKING_DIRECTORY ${curl_bundle_SOURCE_DIR} + COMMAND + autoreconf -i && sh configure --with-openssl --without-libssh2 + --enable-static --enable-shared=no --enable-optimize + --enable-symbol-hiding --disable-manual --without-libidn + --prefix=${curl_bundle_BINARY_DIR} && make -j && make install) + add_custom_target(libcurl_static_build + DEPENDS ${curl_bundle_BINARY_DIR}/lib/libcurl.a) + endif() + + set(CURL_FOUND yes) + set(CURL_LIBRARY ${curl_bundle_BINARY_DIR}/lib/libcurl.a) + set(CURL_THIRDPARTY_DEPS crypto ssl z) + set(CURL_LIBRARIES ${CURL_LIBRARY} ${CURL_THIRDPARTY_DEPS}) + set(CURL_INCLUDE_DIR ${curl_bundle_BINARY_DIR}/include) + set(CURL_INCLUDE_DIRS ${CURL_INCLUDE_DIR}) + set(CURL_VERSION_STRING 7.42.1) + + # Use libcurl static lib instead of cmake defined shared lib + if(NOT TARGET CURL::libcurl) + add_library(CURL::libcurl UNKNOWN IMPORTED) + endif() + add_dependencies(CURL::libcurl libcurl_static_build) + message("${CURL_LIBRARY}") + set_target_properties( + CURL::libcurl + PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${CURL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CURL_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${CURL_THIRDPARTY_DEPS}") +else() + include(${CMAKE_ROOT}/Modules/FindCURL.cmake) +endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 798f86ea..51fbac88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ endif() set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED on) set(ENABLE_MIMIC_VDSO off) - +option(BUILD_CURL_FROM_SOURCE "Compile static libcurl" on) find_package(photon REQUIRED) find_package(tcmu REQUIRED) From 6e105e38a5b5370b5b7cf9df5f11625a07694be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B8=A3=E8=88=9F?= Date: Wed, 27 Sep 2023 14:21:05 +0800 Subject: [PATCH 12/31] fix workflow release dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 鸣舟 --- .github/workflows/release/build.sh | 6 +++--- CMake/FindCURL.cmake | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release/build.sh b/.github/workflows/release/build.sh index 3a5acfac..608ed69f 100644 --- a/.github/workflows/release/build.sh +++ b/.github/workflows/release/build.sh @@ -50,11 +50,11 @@ elif [[ ${OS} =~ "centos" ]]; then fi yum install -y epel-release libaio-devel libcurl-devel openssl-devel libnl3-devel e2fsprogs-devel - yum install -y rpm-build make git wget sudo - yum install --skip-broken -y libzstd-static libzstd-devel + yum install -y rpm-build make git wget sudo autoconf automake libtool + yum install --skip-broken -y libzstd-static gcc gcc-c++ binutils libzstd-devel elif [[ ${OS} =~ "mariner" ]]; then yum install -y libaio-devel libcurl-devel openssl-devel libnl3-devel e2fsprogs-devel glibc-devel libzstd-devel binutils ca-certificates-microsoft build-essential - yum install -y rpm-build make git wget sudo tar gcc gcc-c++ + yum install -y rpm-build make git wget sudo tar gcc gcc-c++ autoconf automake libtool DISTRO=${OS/:/.} PACKAGE_RELEASE="-DPACKAGE_RELEASE=1.${DISTRO}" diff --git a/CMake/FindCURL.cmake b/CMake/FindCURL.cmake index 3b8c241f..56ed0b76 100644 --- a/CMake/FindCURL.cmake +++ b/CMake/FindCURL.cmake @@ -21,9 +21,18 @@ if(${BUILD_CURL_FROM_SOURCE}) OUTPUT ${curl_bundle_BINARY_DIR}/lib/libcurl.a WORKING_DIRECTORY ${curl_bundle_SOURCE_DIR} COMMAND + export CC=${CMAKE_C_COMPILER} && + export CXX=${CMAKE_CXX_COMPILER} && + export LD=${CMAKE_LINKER} && + export CFLAGS=-fPIC && autoreconf -i && sh configure --with-openssl --without-libssh2 --enable-static --enable-shared=no --enable-optimize --enable-symbol-hiding --disable-manual --without-libidn + --disable-ftp --disable-file --disable-ldap --disable-ldaps + --disable-rtsp --disable-dict --disable-telnet --disable-tftp + --disable-pop3 --disable-imap --disable-smb --disable-smtp + --disable-gopher --without-nghttp2 --enable-http + --with-pic=PIC --prefix=${curl_bundle_BINARY_DIR} && make -j && make install) add_custom_target(libcurl_static_build DEPENDS ${curl_bundle_BINARY_DIR}/lib/libcurl.a) From be948faeb189d0243587d4fc9acfeb2c9e0c160a Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Sun, 8 Oct 2023 13:52:33 +0800 Subject: [PATCH 13/31] fix segfault in open lower fix segfault when failed to open remote layer for image with acceleration layer Signed-off-by: liulanzheng --- src/image_file.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/image_file.cpp b/src/image_file.cpp index c2e2baec..23e27107 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -278,6 +278,10 @@ int ImageFile::open_lower_layer(IFile *&file, ImageConfigNS::LayerConfig &layer, } } + if (file == nullptr) { + return -1; + } + if (m_prefetcher != nullptr) { file = m_prefetcher->new_prefetch_file(file, index); } From 7f5594d6cd0f3bd198859fb5984620ee5d866d82 Mon Sep 17 00:00:00 2001 From: Coldwings Date: Mon, 9 Oct 2023 15:51:21 +0800 Subject: [PATCH 14/31] build static openssl when building static libcurl Signed-off-by: Coldwings --- CMake/FindCURL.cmake | 13 +++++--- CMake/FindOpenSSL.cmake | 67 +++++++++++++++++++++++++++++++++++++++++ CMake/Findphoton.cmake | 7 +++++ CMakeLists.txt | 2 +- README.md | 8 +++++ 5 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 CMake/FindOpenSSL.cmake diff --git a/CMake/FindCURL.cmake b/CMake/FindCURL.cmake index 56ed0b76..ab4e44b4 100644 --- a/CMake/FindCURL.cmake +++ b/CMake/FindCURL.cmake @@ -17,6 +17,7 @@ if(${BUILD_CURL_FROM_SOURCE}) if (NOT curl_bundle_POPULATED) FetchContent_Populate(curl_bundle) endif() + find_package(OpenSSL) add_custom_command( OUTPUT ${curl_bundle_BINARY_DIR}/lib/libcurl.a WORKING_DIRECTORY ${curl_bundle_SOURCE_DIR} @@ -25,22 +26,24 @@ if(${BUILD_CURL_FROM_SOURCE}) export CXX=${CMAKE_CXX_COMPILER} && export LD=${CMAKE_LINKER} && export CFLAGS=-fPIC && - autoreconf -i && sh configure --with-openssl --without-libssh2 - --enable-static --enable-shared=no --enable-optimize - --enable-symbol-hiding --disable-manual --without-libidn + autoreconf -i && sh configure --with-ssl="${OPENSSL_ROOT_DIR}" + --without-libssh2 --enable-static --enable-shared=no --enable-optimize + --disable-manual --without-libidn --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --without-nghttp2 --enable-http --with-pic=PIC - --prefix=${curl_bundle_BINARY_DIR} && make -j && make install) + --prefix="${curl_bundle_BINARY_DIR}" && make -j 8 && make install) add_custom_target(libcurl_static_build DEPENDS ${curl_bundle_BINARY_DIR}/lib/libcurl.a) + add_dependencies(libcurl_static_build openssl102_static_build) + make_directory(${curl_bundle_BINARY_DIR}/include) endif() set(CURL_FOUND yes) set(CURL_LIBRARY ${curl_bundle_BINARY_DIR}/lib/libcurl.a) - set(CURL_THIRDPARTY_DEPS crypto ssl z) + set(CURL_THIRDPARTY_DEPS OpenSSL::SSL OpenSSL::Crypto z) set(CURL_LIBRARIES ${CURL_LIBRARY} ${CURL_THIRDPARTY_DEPS}) set(CURL_INCLUDE_DIR ${curl_bundle_BINARY_DIR}/include) set(CURL_INCLUDE_DIRS ${CURL_INCLUDE_DIR}) diff --git a/CMake/FindOpenSSL.cmake b/CMake/FindOpenSSL.cmake new file mode 100644 index 00000000..02e55b9d --- /dev/null +++ b/CMake/FindOpenSSL.cmake @@ -0,0 +1,67 @@ +include(FetchContent) + +if(${BUILD_CURL_FROM_SOURCE}) + message("Add and build standalone libopenssl") + include(FetchContent) + + # make openssl into bundle + FetchContent_Declare( + openssl102 + GIT_REPOSITORY https://github.com/openssl/openssl.git + GIT_TAG OpenSSL_1_0_2-stable + GIT_PROGRESS 1) + + FetchContent_GetProperties(openssl102) + + if(NOT TARGET openssl102_static_build) + if(NOT openssl102_POPULATED) + FetchContent_Populate(openssl102) + endif() + add_custom_command( + OUTPUT ${openssl102_BINARY_DIR}/lib/libssl.a + WORKING_DIRECTORY ${openssl102_SOURCE_DIR} + COMMAND + sh config -fPIC no-unit-test no-shared + --openssldir="${openssl102_BINARY_DIR}" + --prefix="${openssl102_BINARY_DIR}" && make depend -j && make + -j 8 && make install) + add_custom_target(openssl102_static_build + DEPENDS ${openssl102_BINARY_DIR}/lib/libssl.a) + make_directory(${openssl102_BINARY_DIR}/include) + endif() + + set(OPENSSL_FOUND yes) + set(OPENSSL_ROOT_DIR ${openssl102_BINARY_DIR}) + set(OPENSSL_INCLUDE_DIR ${OPENSSL_ROOT_DIR}/include) + set(OPENSSL_INCLUDE_DIRS ${OPENSSL_INCLUDE_DIR}) + set(OPENSSL_SSL_LIBRARY ${OPENSSL_ROOT_DIR}/lib/libssl.a) + set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY}) + set(OPENSSL_CRYPTO_LIBRARY ${OPENSSL_ROOT_DIR}/lib/libcrypto.a) + set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY}) + set(OPENSSL_LINK_DIR ${OPENSSL_ROOT_DIR}/lib) + set(OPENSSL_LINK_DIRS ${OPENSSL_LINK_DIR}) + + if(NOT TARGET OpenSSL::SSL) + add_library(OpenSSL::SSL STATIC IMPORTED) + add_dependencies(OpenSSL::SSL openssl102_static_build) + set_target_properties( + OpenSSL::SSL + PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${OPENSSL_SSL_LIBRARY}") + endif() + + if(NOT TARGET OpenSSL::Crypto) + add_library(OpenSSL::Crypto STATIC IMPORTED) + add_dependencies(OpenSSL::Crypto openssl102_static_build) + set_target_properties( + OpenSSL::Crypto + PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}") + endif() +else() + include(${CMAKE_ROOT}/Modules/FindOpenSSL.cmake) +endif() diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index 11d3bd31..203ada89 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -14,4 +14,11 @@ if(BUILD_TESTING) else() FetchContent_MakeAvailable(photon) endif() + +if (BUILD_CURL_FROM_SOURCE) + find_package(OpenSSL REQUIRED) + find_package(CURL REQUIRED) + add_dependencies(photon_obj CURL::libcurl OpenSSL::SSL OpenSSL::Crypto) +endif() + set(PHOTON_INCLUDE_DIR ${photon_SOURCE_DIR}/include/) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51fbac88..82650611 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ endif() set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED on) set(ENABLE_MIMIC_VDSO off) -option(BUILD_CURL_FROM_SOURCE "Compile static libcurl" on) +option(BUILD_CURL_FROM_SOURCE "Compile static libcurl" off) find_package(photon REQUIRED) find_package(tcmu REQUIRED) diff --git a/README.md b/README.md index be15549c..398fc069 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,14 @@ make -j sudo make install ``` +Considering some libcurl and libopenssl has API changes, if want to build a make-sured compatible version libcurl and openssl, and link to executable as static library. + +Noticed that building libcurl and openssl depends on `autoconf` `automake` and `libtool`. + +```bash +cmake -D BUILD_CURL_FROM_SOURCE=1 .. +``` + If you want to use the [original libext2fs](https://github.com/tytso/e2fsprogs) instead of our [customized libext2fs](https://github.com/data-accelerator/e2fsprogs). ```bash From 06e164ddec57f1033f5484612de691c18d172649 Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Thu, 12 Oct 2023 17:11:20 +0800 Subject: [PATCH 15/31] update photon version Signed-off-by: liulanzheng --- CMake/Findphoton.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index 203ada89..471ce598 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -4,7 +4,7 @@ set(FETCHCONTENT_QUIET false) FetchContent_Declare( photon GIT_REPOSITORY https://github.com/alibaba/PhotonLibOS.git - GIT_TAG v0.6.2 + GIT_TAG v0.6.10 ) if(BUILD_TESTING) From faa421c588ecb54f245a9836f4cc8631f9c8469c Mon Sep 17 00:00:00 2001 From: "lanzheng.liulz" Date: Fri, 13 Oct 2023 15:31:43 +0800 Subject: [PATCH 16/31] update regitsryfs v2 and use v2 as default Signed-off-by: Lanzheng Liu --- CMake/Findphoton.cmake | 2 +- README.md | 2 +- src/config.h | 2 +- src/overlaybd/registryfs/registryfs.h | 7 +- src/overlaybd/registryfs/registryfs_v2.cpp | 373 ++++++++++++++++++++- 5 files changed, 366 insertions(+), 20 deletions(-) diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index 471ce598..90b7fdf2 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -4,7 +4,7 @@ set(FETCHCONTENT_QUIET false) FetchContent_Declare( photon GIT_REPOSITORY https://github.com/alibaba/PhotonLibOS.git - GIT_TAG v0.6.10 + GIT_TAG v0.6.11 ) if(BUILD_TESTING) diff --git a/README.md b/README.md index 398fc069..a85b3130 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ Default configure file `overlaybd.json` is installed to `/etc/overlaybd/`. | enableAudit | Enable audit or not. | | enableThread | Enable overlaybd device run in seprate thread or not. Note `cacheType` should be `ocf`. `false` is default. | | auditPath | The path for audit file, `/var/log/overlaybd-audit.log` is the default value. | -| registryFsVersion | registry client version, 'v1' libcurl based, 'v2' is photon http based. 'v1' is the default value. | +| registryFsVersion | registry client version, 'v1' libcurl based, 'v2' is photon http based. 'v2' is the default value. | | prefetchConfig.concurrency | Prefetch concurrency for reloading trace, `16` is default | > NOTE: `download` is the config for background downloading. After an overlaybd device is lauched, a background task will be running to fetch the whole blobs into local directories. After downloading, I/O requests are directed to local files. Unlike other options, download config is reloaded when a device launching. diff --git a/src/config.h b/src/config.h index 127b006f..2d0f9db0 100644 --- a/src/config.h +++ b/src/config.h @@ -140,7 +140,7 @@ struct GlobalConfig : public ConfigUtils::Config { APPCFG_PARA(p2pConfig, P2PConfig); APPCFG_PARA(exporterConfig, ExporterConfig); APPCFG_PARA(auditPath, std::string, "/var/log/overlaybd-audit.log"); - APPCFG_PARA(registryFsVersion, std::string, "v1"); + APPCFG_PARA(registryFsVersion, std::string, "v2"); APPCFG_PARA(cacheConfig, CacheConfig); APPCFG_PARA(gzipCacheConfig, GzipCacheConfig); APPCFG_PARA(logConfig, LogConfig); diff --git a/src/overlaybd/registryfs/registryfs.h b/src/overlaybd/registryfs/registryfs.h index b3ad76b9..3414ff07 100644 --- a/src/overlaybd/registryfs/registryfs.h +++ b/src/overlaybd/registryfs/registryfs.h @@ -35,5 +35,10 @@ photon::fs::IFileSystem *new_registryfs_v1(PasswordCB callback, photon::fs::IFileSystem *new_registryfs_v2(PasswordCB callback, const char *caFile = nullptr, uint64_t timeout = -1); -} +photon::fs::IFile* new_registry_uploader(photon::fs::IFile *lfile, + std::string &upload_url, + std::string &username, std::string &password, + uint64_t timeout, + ssize_t upload_bs = -1); +} diff --git a/src/overlaybd/registryfs/registryfs_v2.cpp b/src/overlaybd/registryfs/registryfs_v2.cpp index 5b1dd590..5e783714 100644 --- a/src/overlaybd/registryfs/registryfs_v2.cpp +++ b/src/overlaybd/registryfs/registryfs_v2.cpp @@ -23,7 +23,7 @@ #include #include #include - +#include #include #include #include @@ -39,6 +39,8 @@ #include #include #include +#include +#include using namespace photon::fs; using namespace photon::net::http; @@ -52,9 +54,14 @@ static const uint64_t kMinimalMetaLife = 300L * 1000 * 1000; // actual_url lives using HTTP_OP = photon::net::http::Client::OperationOnStack<64 * 1024 - 1>; -static std::unordered_map str_to_kvmap(const estring &src) { +static std::unordered_map str_to_kvmap(estring &src) { + size_t pos = 0; + while ((pos = src.find("\",", pos)) != std::string::npos) { + src.replace(pos, 2, "\";"); + pos += 2; + } std::unordered_map ret; - for (const auto &token : src.split(',')) { + for (const auto &token : src.split(';')) { auto pos = token.find_first_of('='); auto key = token.substr(0, pos); auto val = token.substr(pos + 1).trim('\"'); @@ -127,9 +134,9 @@ class RegistryFSImpl_v2 : public RegistryFS { estring *actual_url = (estring*)&url; if (actual_info->mode == UrlMode::Redirect) actual_url = &actual_info->info; - //use p2p proxy + // use p2p proxy estring accelerate_url; - if(m_accelerate.size() > 0) { + if (m_accelerate.size() > 0) { accelerate_url = estring().appends(m_accelerate, "/", *actual_url); actual_url = &accelerate_url; LOG_DEBUG("p2p_url: `", *actual_url); @@ -187,8 +194,7 @@ class RegistryFSImpl_v2 : public RegistryFS { if (!scope.empty()) { token = m_scope_token.acquire(scope, [&]() -> estring * { estring *token = new estring(); - auto ret = m_callback(url.data()); - if (!authenticate(authurl, ret.first, ret.second, token, tmo.timeout())) { + if (get_token(url, authurl, *token, tmo.timeout()) < 0) { code = 401; delete token; return nullptr; @@ -236,6 +242,29 @@ class RegistryFSImpl_v2 : public RegistryFS { return 0; } + photon::net::http::Client* get_client() { + return m_client; + } + + void refresh_client() { + delete m_client; + m_client = new_http_client(); + } + + int refresh_token(const estring &url, estring &token) { + estring authurl, scope; + Timeout tmo(m_timeout); + if (get_scope_auth(url, &authurl, &scope, tmo.timeout(), true) < 0) + return -1; + if (!scope.empty()) { + get_token(url, authurl, token, tmo.timeout()); + if (token.empty()) { + LOG_ERROR_RETURN(0, -1, "Failed to get token"); + } + } + return 0; + } + protected: PasswordCB m_callback; estring m_accelerate; @@ -246,13 +275,18 @@ class RegistryFSImpl_v2 : public RegistryFS { ObjectCache m_scope_token; ObjectCache m_url_info; - int get_scope_auth(const estring &url, estring *authurl, estring *scope, uint64_t timeout) { + int get_scope_auth(const estring &url, estring *authurl, estring *scope, uint64_t timeout, + bool push = false) { Timeout tmo(timeout); - - HTTP_OP op(m_client, Verb::GET, url); + auto verb = push ? Verb::POST : Verb::GET; + HTTP_OP op(m_client, verb, url); op.follow = 0; op.retry = 0; - op.req.headers.range(0, 0); + if (!push) + op.req.headers.range(0, 0); + else + op.req.headers.insert("Content-Type", "application/octet-stream"); + op.timeout = tmo.timeout(); op.call(); if (op.status_code == -1) @@ -298,6 +332,15 @@ class RegistryFSImpl_v2 : public RegistryFS { return 0; } + int get_token(const estring &url, const estring &authurl, estring &token, uint64_t timeout) { + auto ret = m_callback(url.data()); + if (!authenticate(authurl, ret.first, ret.second, &token, timeout)) { + token = ""; + return -1; + } + return 0; + } + bool authenticate(const estring &authurl, std::string &username, std::string &password, estring *token, uint64_t timeout) { Timeout tmo(timeout); @@ -313,18 +356,18 @@ class RegistryFSImpl_v2 : public RegistryFS { op.timeout = tmo.timeout(); op.call(); if (op.status_code != 200) { - LOG_ERROR_RETURN(EPERM, false, "invalid key"); + LOG_ERROR_RETURN(EPERM, false, "invalid key, code=", op.status_code); } estring body; - body.resize(16 *1024); - auto len = op.resp.read((void*)body.data(), 16 *1024); + body.resize(16 * 1024); + auto len = op.resp.read((void*)body.data(), 16 * 1024); body.resize(len); if (op.status_code == 200 && parse_token(body, token) == 0) return true; LOG_ERROR_RETURN(EPERM, false, "auth failed, response code=` ", op.status_code, VALUE(authurl)); } -}; // namespace FileSystem +}; class RegistryFileImpl_v2 : public photon::fs::VirtualReadOnlyFile { public: @@ -340,7 +383,7 @@ class RegistryFileImpl_v2 : public photon::fs::VirtualReadOnlyFile { return m_fs; } - ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) { + ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) override { if (m_filesize == 0) { struct stat stat; auto stret = fstat(&stat); @@ -432,3 +475,301 @@ IFileSystem *new_registryfs_v2(PasswordCB callback, const char *caFile, uint64_t LOG_ERROR_RETURN(EINVAL, nullptr, "password callback not set"); return new RegistryFSImpl_v2(callback, caFile ? caFile : "", timeout); } + +class RegistryUploader : public VirtualFile { +public: + photon::semaphore m_sem, m_init_sem; + SHA256_CTX m_sha256_ctx = {0}; + std::string m_sha256sum; + std::thread m_upload_th; + IFile *m_local_file; + estring m_origin_upload_url, m_upload_url; + ssize_t m_upload_chunk_size = 128 * 1024 * 1024; + void *m_upload_buf; + off_t m_upload_pos = 0, m_write_pos = 0; + bool m_finished = false, m_failed = false; + RegistryFSImpl_v2 *m_upload_fs; + uint64_t m_http_client_ts = 0; + std::string m_username, m_password; + uint64_t m_timeout = -1; + estring m_token; + + RegistryUploader(IFile *lfile, std::string &upload_url, std::string &username, + std::string &password, uint64_t timeout = -1, ssize_t upload_bs = -1) + : m_local_file(lfile), m_origin_upload_url(upload_url), m_username(username), m_password(password), + m_timeout(timeout) { + if (upload_bs != -1) + m_upload_chunk_size = upload_bs; + SHA256_Init(&m_sha256_ctx); + } + + int init() { + LOG_INFO("init registry upload ", VALUE(m_username)); + m_upload_th = std::thread(&RegistryUploader::upload_thread, this); + m_init_sem.wait(1); + if (m_failed) { + m_upload_th.join(); + return -1; + } + return 0; + } + + ~RegistryUploader() { + } + + int fsync() override { + if (m_failed) { + m_upload_th.join(); + return -1; + } + // calc sha256 result + unsigned char sha[32]; + SHA256_Final(sha, &m_sha256_ctx); + char res[SHA256_DIGEST_LENGTH * 2]; + for (int i = 0; i < SHA256_DIGEST_LENGTH; i++) + sprintf(res + (i * 2), "%02x", sha[i]); + m_sha256sum = "sha256:" + std::string(res, SHA256_DIGEST_LENGTH * 2); + LOG_INFO(VALUE(m_sha256sum)); + + m_finished = true; + m_sem.signal(1); + m_upload_th.join(); + if (m_failed) { + return -1; + } + return 0; + } + + UNIMPLEMENTED(int fdatasync() override); + UNIMPLEMENTED(int close() override); + UNIMPLEMENTED(int fchmod(mode_t mode) override); + UNIMPLEMENTED(int fchown(uid_t owner, gid_t group) override); + UNIMPLEMENTED(int ftruncate(off_t length) override); + virtual IFileSystem *filesystem() { + return nullptr; + } + virtual int fstat(struct stat *buf) override { + return m_local_file->fstat(buf); + } + + ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) override { + LOG_ERRNO_RETURN(EINVAL, -1, "not readable"); + } + + ssize_t write(const void *buf, size_t count) override { + if (m_failed) { + LOG_ERROR_RETURN(EINVAL, -1, "already failed"); + } + auto rc = m_local_file->write(buf, count); + if (rc < 0) { + LOG_ERRNO_RETURN(0, -1, "failed to write local file", VALUE(rc)); + } + if (rc > 0 && SHA256_Update(&m_sha256_ctx, buf, rc) < 0) { + LOG_ERRNO_RETURN(0, -1, "sha256 calculate error"); + } + m_write_pos += rc; + m_sem.signal(1); + return rc; + } + + ssize_t pwrite(const void *buf, size_t count, off_t offset) override { + LOG_ERROR_RETURN(EINVAL, -1, "pwrite is not supported"); + } + + std::pair load_auth(const char *remote_path) { + return std::make_pair(m_username, m_password); + } + + // non-empty digest means complete request + off_t upload_chunk(off_t offset, size_t count, std::string_view digest) { + LOG_INFO("upload chunk ", VALUE(offset), VALUE(count), VALUE(digest)); + Timeout tmo(m_timeout); + auto verb = Verb::PATCH; + estring url = m_upload_url; + if (!digest.empty()) { + verb = Verb::PUT; + estring delimiter = "?"; + if (m_upload_url.find("?") != std::string::npos) { + delimiter = "&"; + } + url = estring().appends(m_upload_url, delimiter, "digest=", digest); + } + int retry = 3; + LOG_INFO(VALUE(url)); + again: + if (photon::now - m_http_client_ts >= 5ULL * 60 * 1000 * 1000) { + LOG_INFO("http client expire, refresh"); + m_upload_fs->refresh_client(); + m_http_client_ts = photon::now; + } + HTTP_OP op(m_upload_fs->get_client(), verb, url); + op.follow = 0; + op.retry = 0; + op.req.headers.content_length(count); + + auto writer = [&](Request *req) -> ssize_t { + auto start = offset; + ssize_t ret = 0; + while (start < (off_t)(offset + count)) { + ssize_t cnt = 1024 * 1024; + if ((off_t)(start + cnt) > (off_t)(offset + count)) + cnt = offset + count - start; + auto rc = m_local_file->pread(m_upload_buf, cnt, start); + if (rc != cnt) { + LOG_ERRNO_RETURN(0, -1, "failed to read file", VALUE(rc), VALUE(cnt)); + } + rc = req->write(m_upload_buf, cnt); + if (rc != cnt) { + LOG_ERRNO_RETURN(0, -1, "failed to upload", VALUE(rc), VALUE(cnt)); + } + start += cnt; + ret += cnt; + } + return ret; + }; + + if (digest.empty()) { + op.req.headers.insert("Content-Type", "application/octet-stream"); + op.req.headers.insert_format("Content-Range", "%lu-%lu", offset, offset + count - 1); + op.body_writer = writer; + } + op.req.headers.insert(kAuthHeaderKey, "Bearer "); + op.req.headers.value_append(m_token); + op.timeout = tmo.timeout(); + op.call(); + + if (op.status_code == 401 || op.status_code == 403) { + LOG_WARN("Token invalid, try refresh"); + if (retry--) { + if (m_upload_fs->refresh_token(m_upload_url, m_token) < 0) { + LOG_ERRNO_RETURN(0, -1, "failed update token"); + } + goto again; + } + } + + if (op.status_code / 100 == 2) { + if (count > 0) { + auto rg = op.resp.headers.range(); + if (rg.second == -1) { + LOG_ERRNO_RETURN(0, -1, "failed to upload, range=(`-`)", rg.first, rg.second); + } + auto new_upload_pos = rg.second + 1; + m_upload_url = op.resp.headers["Location"]; + return new_upload_pos; + } else { + LOG_INFO(op.resp.headers["Docker-Content-Digest"]); + } + return 0; + } + LOG_ERRNO_RETURN(0, -1, "failed to upload, code=", op.status_code); + } + + int upload_thread() { + photon::init(photon::INIT_EVENT_DEFAULT, photon::INIT_IO_NONE); + DEFER(photon::fini()); + m_upload_fs = new RegistryFSImpl_v2({this, &RegistryUploader::load_auth}, "", m_timeout); + DEFER({ delete m_upload_fs; }); + m_http_client_ts = photon::now; + ::posix_memalign(&m_upload_buf, 4096, 1024 * 1024); + DEFER(free(m_upload_buf)); + int retry = 3; + again: + m_upload_pos = 0; + if (init_upload() < 0) { + if (retry--) { + goto again; + } + m_failed = true; + m_init_sem.signal(1); + LOG_ERRNO_RETURN(0, -1, "failed to init upload"); + } + + m_init_sem.signal(1); + while (!m_finished && !m_failed) { + m_sem.wait(1); + while (m_write_pos > m_upload_pos + m_upload_chunk_size) { + m_upload_pos = upload_chunk(m_upload_pos, m_upload_chunk_size, ""); + if (m_upload_pos < 0) { + if (retry--) { + LOG_ERROR("failed to upload chunk, retry"); + m_sem.signal(1); + goto again; + } + + m_failed = true; + goto fail; + } + } + } + while (m_write_pos > m_upload_pos && !m_failed) { + auto size = m_write_pos - m_upload_pos; + if (size > m_upload_chunk_size) + size = m_upload_chunk_size; + m_upload_pos = upload_chunk(m_upload_pos, size, ""); + if (m_upload_pos < 0) { + if (retry--) { + LOG_ERROR("failed to upload chunk, retry"); + goto again; + } + m_failed = true; + goto fail; + } + } + + // send complete + m_upload_pos = upload_chunk(m_upload_pos, 0, m_sha256sum); + if (m_upload_pos < 0) { + if (retry--) { + LOG_ERROR("failed to send complete request, retry"); + goto again; + } + m_failed = true; + LOG_ERROR("failed to send complete request"); + goto fail; + } + LOG_INFO("file uploaded"); + return 0; + + fail: + LOG_ERROR("file upload failed"); + return -1; + } + + int init_upload() { + m_upload_url = m_origin_upload_url; + if (m_upload_fs->refresh_token(m_upload_url, m_token) < 0) { + return -1; + } + + Timeout tmo(m_timeout); + HTTP_OP op(m_upload_fs->get_client(), Verb::POST, m_upload_url); + op.req.headers.insert("Content-Type", "application/octet-stream"); + op.req.headers.insert(kAuthHeaderKey, "Bearer "); + op.req.headers.value_append(m_token); + op.follow = 0; + op.retry = 0; + op.timeout = tmo.timeout(); + op.call(); + if (op.status_code == 401 || op.status_code == 403) { + LOG_ERROR_RETURN(0, -1, "Token invalid"); + } + if (op.status_code / 100 == 2) { + auto location = op.resp.headers["Location"]; + m_upload_url = std::string(location); + LOG_INFO(VALUE(m_upload_url)); + return 0; + } + LOG_ERROR_RETURN(0, -1, "failed to get upload url, code=`", op.status_code); + } +}; + +IFile *new_registry_uploader(IFile *lfile, std::string &upload_url, std::string &username, + std::string &password, uint64_t timeout, ssize_t upload_bs) { + auto ret = new RegistryUploader(lfile, upload_url, username, password, timeout, upload_bs); + if (ret->init() < 0) { + delete ret; + return nullptr; + } + return ret; +} From 870faa1ebe683ad2212bd8231f7e7ce1d29ae200 Mon Sep 17 00:00:00 2001 From: Lanzheng Liu Date: Tue, 17 Oct 2023 17:38:17 +0800 Subject: [PATCH 17/31] multi-processor zfile builder Signed-off-by: Lanzheng Liu --- src/overlaybd/zfile/compressor.h | 6 +- src/overlaybd/zfile/test/test.cpp | 72 ++++++++ src/overlaybd/zfile/zfile.cpp | 287 +++++++++++++++++++++++++++--- src/tools/overlaybd-commit.cpp | 3 + 4 files changed, 343 insertions(+), 25 deletions(-) diff --git a/src/overlaybd/zfile/compressor.h b/src/overlaybd/zfile/compressor.h index 76c4fb5a..a56605b6 100644 --- a/src/overlaybd/zfile/compressor.h +++ b/src/overlaybd/zfile/compressor.h @@ -60,10 +60,12 @@ class CompressArgs { photon::fs::IFile *fdict = nullptr; std::unique_ptr dict_buf = nullptr; CompressOptions opt; + bool overwrite_header; + int workers; CompressArgs(const CompressOptions &opt, photon::fs::IFile *dict = nullptr, - unsigned char *dict_buf = nullptr) - : fdict(dict), dict_buf(dict_buf), opt(opt) { + unsigned char *dict_buf = nullptr, bool overwrite_header = false, int workers = 1) + : fdict(dict), dict_buf(dict_buf), opt(opt), overwrite_header(overwrite_header), workers(workers) { if (fdict || dict_buf) { this->opt.use_dict = 1; } diff --git a/src/overlaybd/zfile/test/test.cpp b/src/overlaybd/zfile/test/test.cpp index b685e415..4ee05a22 100644 --- a/src/overlaybd/zfile/test/test.cpp +++ b/src/overlaybd/zfile/test/test.cpp @@ -277,6 +277,78 @@ TEST_F(ZFileTest, dsa) { ASSERT_EQ(ret, 0); } +TEST_F(ZFileTest, verify_builder) { + auto fn_src = "verify.data"; + auto fn_zfile = "verify.zfile"; + auto fn_zfile_1 = "verify.zfile.1"; + auto src = lfs->open(fn_src, O_CREAT | O_TRUNC | O_RDWR, 0644); + if (src == nullptr) { + LOG_ERROR("failed to open file: `(`)", errno, strerror(errno)); + return; + } + randwrite(src, write_times); + struct stat _st; + if (src->fstat(&_st) != 0) { + LOG_ERROR("failed randwrite src file: `(`)", errno, strerror(errno)); + return; + } + + // zfile builder multi-processor + auto dst = lfs->open(fn_zfile, O_CREAT | O_TRUNC | O_RDWR, 0644); + if (!dst) { + LOG_ERROR("failed to open file: `(`)", errno, strerror(errno)); + } + DEFER({delete dst;}); + ZFile::CompressOptions opt; + opt.verify = 1; + opt.block_size = 4096; + ZFile::CompressArgs zfile_args(opt); + zfile_args.workers = 4; + auto zfile_builder = ZFile::new_zfile_builder(dst, &zfile_args, false); + src->lseek(0, 0); + char buf[16*1024]; + while (true) { + auto sz = rand() % 8192 + 1; + auto rc = src->read(buf, sz); + if (rc <= 0) break; + zfile_builder->write(buf, rc); + } + zfile_builder->close(); + + // zfile builder + ZFile::CompressOptions opt_1; + opt_1.verify = 1; + opt_1.block_size = 4096; + ZFile::CompressArgs zfile_args_1(opt_1); + zfile_args_1.workers = 1; + auto dst_1 = lfs->open(fn_zfile_1, O_CREAT | O_TRUNC | O_RDWR, 0644); + if (!dst_1) { + LOG_ERROR("failed to open file: `(`)", errno, strerror(errno)); + } + DEFER({delete dst_1;}); + auto zfile_builder_1 = ZFile::new_zfile_builder(dst_1, &zfile_args_1, false); + src->lseek(0, 0); + while (true) { + auto sz = rand() % 8192 + 1; + auto rc = src->read(buf, sz); + if (rc <= 0) break; + zfile_builder_1->write(buf, rc); + } + zfile_builder_1->close(); + + EXPECT_EQ(dst->lseek(0, SEEK_CUR), dst_1->lseek(0, SEEK_CUR)); + dst->lseek(0, 0); + dst_1->lseek(0, 0); + char buf_1[16*1024]; + while (true) { + auto rc = dst->read(buf, 8192); + auto rc_1 = dst_1->read(buf_1, 8192); + EXPECT_EQ(rc, rc_1); + EXPECT_EQ(memcmp(buf, buf_1, rc), 0); + if (rc == 0) break; + } +} + int main(int argc, char **argv) { auto seed = 154702356; cerr << "seed = " << seed << endl; diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index b2ab970e..43f99946 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -15,8 +15,6 @@ */ #include "zfile.h" -#include -#include #include #include #include @@ -29,9 +27,12 @@ #include #include #include +#include +#include #include "crc32/crc32c.h" #include "compressor.h" -#include "photon/common/alog.h" +#include +#include using namespace photon::fs; @@ -559,17 +560,23 @@ ssize_t compress_data(ICompressor *compressor, const unsigned char *buf, size_t return compressed_len; } -class ZFileBuilder : public VirtualReadOnlyFile { +class ZFileBuilderBase : public VirtualReadOnlyFile { public: - ZFileBuilder(IFile *file, const CompressArgs *args, bool ownership) - : m_dest(file), m_opt(args->opt), m_ownership(ownership) { + virtual int init() = 0; + virtual int fini() = 0; +}; +class ZFileBuilder : public ZFileBuilderBase { +public: + ZFileBuilder(IFile *file, const CompressArgs *args, bool ownership) + : m_dest(file), m_args(args), m_ownership(ownership) { + m_opt = m_args->opt; LOG_INFO("create stream compressing object. [ block size: `, type: `, enable_checksum: `]", m_opt.block_size, m_opt.algo, m_opt.verify); } - int init(const CompressArgs *args) { - m_compressor = create_compressor(args); + int init() { + m_compressor = create_compressor(m_args); if (m_compressor == nullptr) { LOG_ERRNO_RETURN(0, -1, "create compressor failed."); } @@ -626,16 +633,19 @@ class ZFileBuilder : public VirtualReadOnlyFile { auto ret = write_header_trailer(m_dest, false, true, true, pht); if (ret < 0) LOG_ERRNO_RETURN(0, -1, "failed to write trailer"); - LOG_INFO("overwrite file header."); - ret = write_header_trailer(m_dest, true, false, true, pht, 0); - if (ret < 0) { - LOG_ERRNO_RETURN(0, -1, "failed to overwrite header"); + if (m_args->overwrite_header) { + LOG_INFO("overwrite file header."); + ret = write_header_trailer(m_dest, true, false, true, pht, 0); + if (ret < 0) { + LOG_ERRNO_RETURN(0, -1, "failed to overwrite header"); + } } return 0; } virtual int close() override { - auto ret = fini(); + if (fini() < 0) + return -1; delete m_compressor; delete[] compressed_data; if (m_ownership) { @@ -683,6 +693,7 @@ class ZFileBuilder : public VirtualReadOnlyFile { off_t moffset = 0; size_t raw_data_size = 0; size_t m_buf_size = 0; + const CompressArgs *m_args; CompressOptions m_opt; ICompressor *m_compressor = nullptr; bool m_ownership = false; @@ -696,7 +707,231 @@ class ZFileBuilder : public VirtualReadOnlyFile { UNIMPLEMENTED(int fstat(struct stat *buf) override); }; -// static std::unique_ptr +// multi-processor supported zfile builder +class ZFileBuilderMP : public ZFileBuilderBase { +public: + ZFileBuilderMP(IFile *file, const CompressArgs *args, bool ownership) + : m_dest(file), m_args(args), m_ownership(ownership) { + m_workers = args->workers; + m_opt = m_args->opt; + LOG_INFO("create multi-processor stream compressing object. [ block size: `, alog: `, enable_checksum: `, workers: `]", + m_opt.block_size, m_opt.algo, m_opt.verify, m_workers); + } + + class WorkerCtx { + public: + int id; + bool writable = false; + unsigned char* ibuf = nullptr; + unsigned char* obuf = nullptr; + size_t size; + size_t buf_size; + photon::semaphore writable_sem; + photon::semaphore compress_sem; + photon::semaphore write_sem; + int result = 0; + + WorkerCtx(int id, size_t buf_size) + : id(id), buf_size(buf_size), writable_sem(1), compress_sem(0), write_sem(0) { + ibuf = new unsigned char[buf_size]; + obuf = new unsigned char[buf_size]; + } + + ~WorkerCtx() { + delete ibuf; + delete obuf; + } + void start_compress(int isize) { + writable = false; + size = isize; + compress_sem.signal(1); + } + }; + + int init() { + auto pht = new (m_ht)(CompressionFile::HeaderTrailer); + pht->set_compress_option(m_opt); + LOG_INFO("write header."); + auto ret = write_header_trailer(m_dest, true, false, true, pht); + if (ret < 0) { + LOG_ERRNO_RETURN(0, -1, "failed to write header"); + } + moffset = + CompressionFile::HeaderTrailer::SPACE + 0; // opt.dict_size; + // currently dictionary is not supported. + m_buf_size = m_opt.block_size + BUF_SIZE; + cur_id = 0; + for (int i = 0; i < m_workers; i++) + workers.emplace_back(new WorkerCtx(i, m_buf_size)); + + for (int i = 0; i < m_workers; i++) { + ths.emplace_back([&, id=i] { + photon::init(photon::INIT_EVENT_EPOLL, photon::INIT_IO_NONE); + DEFER(photon::fini()); + + auto ctx = workers[id]; + auto next_ctx = workers[(id+1)%m_workers]; + auto compressor = create_compressor(m_args); + if (compressor == nullptr) { + ctx->result = -1; + LOG_ERRNO_RETURN(0, -1, "failed to create compressor"); + } + DEFER(delete compressor); + + while (true) { + ctx->compress_sem.wait(1); + if (m_stop_flag && ctx->size == 0) { + break; + } + auto compressed_size = + compress_data(compressor, ctx->ibuf, ctx->size, ctx->obuf, ctx->buf_size, m_opt.verify); + if (compressed_size < 0) { + ctx->result = -1; + LOG_ERRNO_RETURN(EIO, -1, "failed to compress"); + } + + ctx->size = 0; + // ibuf is ready to write + ctx->writable_sem.signal(1); + + ctx->write_sem.wait(1); + moffset += compressed_size; + m_block_len.push_back(compressed_size); + if (m_dest->write(ctx->obuf, compressed_size) != compressed_size) { + ctx->result = -1; + LOG_ERRNO_RETURN(0, -1, "failed to write compressed data"); + } + next_ctx->write_sem.signal(1); + } + return 0; + }); + } + + workers[0]->write_sem.signal(1); + return 0; + } + + int fini() { + if (reserved_size) { + workers[cur_id]->start_compress(reserved_size); + } + + // wait for workers + m_stop_flag = true; + for (int i = 0; i < m_workers; i++) + workers[i]->compress_sem.signal(1); + for (auto &th : ths) { + th.join(); + } + for (int i = 0; i < m_workers; i++) { + if (workers[i]->result < 0) { + LOG_ERROR_RETURN(0, -1, "failed to compress data"); + } + } + + // compress done + uint64_t index_offset = moffset; + uint64_t index_size = m_block_len.size(); + ssize_t index_bytes = index_size * sizeof(uint32_t); + LOG_INFO("write index (offset: `, count: ` size: `)", index_offset, index_size, index_bytes); + if (m_dest->write(&m_block_len[0], index_bytes) != index_bytes) { + LOG_ERRNO_RETURN(0, -1, "failed to write index."); + } + auto pht = (CompressionFile::HeaderTrailer *)m_ht; + pht->index_crc = crc32::crc32c(&m_block_len[0], index_bytes); + LOG_INFO("index crc: ", pht->index_crc); + pht->index_offset = index_offset; + pht->index_size = index_size; + pht->original_file_size = raw_data_size; + LOG_INFO("write trailer."); + auto ret = write_header_trailer(m_dest, false, true, true, pht); + if (ret < 0) + LOG_ERRNO_RETURN(0, -1, "failed to write trailer"); + if (m_args->overwrite_header) { + LOG_INFO("overwrite file header."); + ret = write_header_trailer(m_dest, true, false, true, pht, 0); + if (ret < 0) { + LOG_ERRNO_RETURN(0, -1, "failed to overwrite header"); + } + } + return 0; + } + + virtual int close() override { + if (fini() < 0) { + return -1; + } + if (m_ownership) { + m_dest->close(); + } + return 0; + } + + inline void copy(WorkerCtx *ctx, const void *from, size_t count, off_t offset) { + if (!ctx->writable) { + ctx->writable_sem.wait(1); + ctx->writable = true; + } + memcpy(ctx->ibuf + offset, from, count); + } + + virtual ssize_t write(const void *buf, size_t count) override { + raw_data_size += count; + auto expected_ret = count; + auto ctx = workers[cur_id]; + + if (reserved_size != 0) { + if (reserved_size + count < m_opt.block_size) { + copy(ctx, buf, count, reserved_size); + reserved_size += count; + return expected_ret; // save uncompressed buffer and return write count; + } + auto delta = m_opt.block_size - reserved_size; + copy(ctx, buf, delta, reserved_size); + buf = (const void *)((const char*)buf + delta); + count -= delta; + + ctx->start_compress(reserved_size + delta); + cur_id = (cur_id+1)%m_workers; + ctx = workers[cur_id]; + reserved_size = 0; + } + + for (off_t i = 0; i < (ssize_t)count; i += m_opt.block_size) { + if (i + m_opt.block_size > (ssize_t)count) { + copy(ctx, buf+i, count-i, 0); + reserved_size = count - i; + break; + } + copy(ctx, buf+i, m_opt.block_size, 0); + ctx->start_compress(m_opt.block_size); + cur_id = (cur_id+1)%m_workers; + ctx = workers[cur_id]; + } + LOG_DEBUG("compressed ` bytes done. reserved: `", expected_ret, reserved_size); + return expected_ret; + } + + std::vector workers; + bool m_stop_flag = false; + int m_workers; + IFile *m_dest; + off_t moffset = 0; + size_t raw_data_size = 0; + size_t m_buf_size = 0; + const CompressArgs *m_args; + CompressOptions m_opt; + bool m_ownership = false; + std::vector m_block_len; + std::vector ths; + size_t reserved_size = 0; + char m_ht[CompressionFile::HeaderTrailer::SPACE]{}; + int cur_id; + + UNIMPLEMENTED_POINTER(IFileSystem *filesystem() override); + UNIMPLEMENTED(int fstat(struct stat *buf) override); +}; + bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_trailer, CompressionFile::JumpTable &jump_table, bool trailer = true) { char buf[CompressionFile::HeaderTrailer::SPACE]; @@ -844,7 +1079,6 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo } int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { - if (args == nullptr) { LOG_ERROR_RETURN(EINVAL, -1, "CompressArgs is null"); } @@ -941,10 +1175,12 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) { ret = write_header_trailer(as, false, true, true, pht); if (ret < 0) LOG_ERRNO_RETURN(0, -1, "failed to write trailer"); - LOG_INFO("overwrite file header."); - ret = write_header_trailer(as, true, false, true, pht, 0); - if (ret < 0) { - LOG_ERRNO_RETURN(0, -1, "failed to overwrite header"); + if (args->overwrite_header) { + LOG_INFO("overwrite file header."); + ret = write_header_trailer(as, true, false, true, pht, 0); + if (ret < 0) { + LOG_ERRNO_RETURN(0, -1, "failed to overwrite header"); + } } return 0; } @@ -1021,11 +1257,16 @@ int is_zfile(IFile *file) { } IFile *new_zfile_builder(IFile *file, const CompressArgs *args, bool ownership) { - auto r = new ZFileBuilder(file, args, ownership); - if (r->init(args) != 0) { - delete r; + ZFileBuilderBase *builder; + if (args->workers == 1) { + builder = new ZFileBuilder(file, args, ownership); + } else { + builder = new ZFileBuilderMP(file, args, ownership); + } + if (builder->init() != 0) { + delete builder; LOG_ERRNO_RETURN(0, nullptr, "init zfileStreamWriter failed."); } - return r; + return builder; } } // namespace ZFile diff --git a/src/tools/overlaybd-commit.cpp b/src/tools/overlaybd-commit.cpp index ae26c7ab..e3353113 100644 --- a/src/tools/overlaybd-commit.cpp +++ b/src/tools/overlaybd-commit.cpp @@ -58,6 +58,7 @@ int main(int argc, char **argv) { bool build_fastoci = false; bool tar = false, rm_old = false, seal = false, commit_sealed = false; bool verbose = false; + int compress_threads = 1; CLI::App app{"this is overlaybd-commit"}; app.add_option("-m", commit_msg, "add some custom message if needed"); @@ -77,6 +78,7 @@ int main(int argc, char **argv) { app.add_option("commit_file", commit_file_path, "commit file path")->type_name("FILEPATH"); app.add_flag("--seal", seal, "seal only, data_file is output itself")->default_val(false); app.add_flag("--commit_sealed", commit_sealed, "commit sealed, index_file is output")->default_val(false); + app.add_option("--compress_threads", compress_threads, "compress threads")->default_val(1); app.add_flag("--verbose", verbose, "output debug info")->default_val(false); CLI11_PARSE(app, argc, argv); build_turboOCI = build_turboOCI || build_fastoci; @@ -145,6 +147,7 @@ int main(int argc, char **argv) { fout = open_file(fs, commit_file_path.c_str(), O_RDWR | O_EXCL | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ZFile::CompressArgs zfile_args(opt); + zfile_args.workers = compress_threads; zfile_builder = ZFile::new_zfile_builder(fout, &zfile_args, false); out = zfile_builder; } else { From 9046a369e3733cbc1236ff849d457fd1e06ef42f Mon Sep 17 00:00:00 2001 From: Coldwings Date: Fri, 3 Nov 2023 15:24:07 +0800 Subject: [PATCH 18/31] fix the metrics exporter Signed-off-by: Coldwings --- src/exporter_handler.h | 80 +++++++++------ src/exporter_server.h | 100 +++++++++---------- src/image_service.cpp | 6 +- src/metrics.h | 222 ----------------------------------------- src/metrics_fs.h | 73 ++++++++------ src/textexporter.h | 16 +++ 6 files changed, 163 insertions(+), 334 deletions(-) delete mode 100644 src/metrics.h diff --git a/src/exporter_handler.h b/src/exporter_handler.h index ddc84333..18786167 100644 --- a/src/exporter_handler.h +++ b/src/exporter_handler.h @@ -1,12 +1,27 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #pragma once -#include "metrics.h" #include +#include +#include #include -#include -#include "textexporter.h" -#include +#include "textexporter.h" namespace ExposeMetrics { @@ -23,51 +38,58 @@ namespace ExposeMetrics { ret.append(name.help_str()).append("\n"); \ ret.append(name.type_str()).append("\n"); \ for (auto x : va_##name) { \ - ret.append( \ - name.render(x.second->val(), nodename.c_str(), x.first)) \ - .append("\n"); \ + ret.append(name.render(x.second->val(), x.first)).append("\n"); \ } \ ret.append("\n"); \ } - struct ExposeRender : public photon::net::http::HTTPHandler { - EXPOSE_PHOTON_METRICLIST(latency, Metric::AverageLatencyCounter); - - std::string nodename; - photon::net::http::DelegateHTTPHandler handler; + EXPOSE_PHOTON_METRICLIST(throughput, Metric::QPSCounter); + EXPOSE_PHOTON_METRICLIST(qps, Metric::QPSCounter); + EXPOSE_PHOTON_METRICLIST(latency, Metric::MaxLatencyCounter); + EXPOSE_PHOTON_METRICLIST(count, Metric::AddCounter); + EXPOSE_PHOTON_METRICLIST(cache, Metric::ValueCounter); template - ExposeRender(Args&&... args) - : nodename(std::forward(args)...), - handler{this, &ExposeRender::handle_request} {} + ExposeRender(Args&&... args) {} std::string render() { - EXPOSE_TEMPLATE(alive, is_alive : gauge{node}); - EXPOSE_TEMPLATE(latency, blob_read_average_latency : gauge{node, type} #us); + EXPOSE_TEMPLATE(alive, OverlayBD_Alive : gauge{node}); + EXPOSE_TEMPLATE(throughput, OverlayBD_Read_Throughtput + : gauge{node, type, mode} #Bytes / sec); + EXPOSE_TEMPLATE(qps, OverlayBD_QPS : gauge{node, type, mode}); + EXPOSE_TEMPLATE(latency, OverlayBD_MaxLatency + : gauge{node, type, mode} #us); + EXPOSE_TEMPLATE(count, OverlayBD_Count : gauge{node, type} #Bytes); std::string ret(alive.help_str()); ret.append("\n") .append(alive.type_str()) .append("\n") - .append(alive.render(1, nodename.c_str())) + .append(alive.render(1)) .append("\n\n"); + LOOP_APPEND_METRIC(ret, throughput); + LOOP_APPEND_METRIC(ret, qps); LOOP_APPEND_METRIC(ret, latency); + LOOP_APPEND_METRIC(ret, count); return ret; } - int handle_request(photon::net::http::Request& req, photon::net::http::Response& resp, std::string_view) override { - std::string str = render(); - auto cl = str.size(); - if (cl > 4096) { - LOG_ERROR_RETURN(0, -1, "RetType failed test"); - } + int handle_request(photon::net::http::Request& req, + photon::net::http::Response& resp, + std::string_view) override { + auto body = render(); resp.set_result(200); - resp.headers.content_length(cl); - resp.write((void*)str.data(), str.size()); - return 0; + resp.keep_alive(true); + resp.headers.insert("Content-Type", "text/plain; version=0.0.4"); + resp.headers.content_length(body.length()); + ssize_t len = 0; + len = resp.write((void*)body.data(), body.length()); + if (len == (ssize_t)body.length()) { + return 0; + } else { + LOG_ERRNO_RETURN(0, -1, "Failed to write exporter response"); + } } - - photon::net::http::DelegateHTTPHandler get_handler() { return handler; } }; #undef LOOP_APPEND_METRIC diff --git a/src/exporter_server.h b/src/exporter_server.h index e485ad6c..80d0b6e1 100644 --- a/src/exporter_server.h +++ b/src/exporter_server.h @@ -1,81 +1,75 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #pragma once -#include "config.h" -#include "exporter_handler.h" -#include "metrics_fs.h" +#include #include #include -#include #include #include -#include -#include -#include - -#include - +#include "config.h" +#include "exporter_handler.h" +#include "metrics_fs.h" class OverlayBDMetric { public: - MetricMeta download; - ExposeMetrics::ExposeRender exporter; + MetricMeta pread, download; - photon::Timer timer; - uint64_t m_timeout = -1; + ExposeMetrics::ExposeRender exporter; - OverlayBDMetric() : timer(-1, {this, &OverlayBDMetric::on_timer}) { + OverlayBDMetric() { + exporter.add_throughput("pread", pread.throughput); + exporter.add_latency("pread", pread.latency); + exporter.add_qps("pread", pread.qps); + exporter.add_count("pread", pread.total); + exporter.add_throughput("download", download.throughput); exporter.add_latency("download", download.latency); + exporter.add_qps("download", download.qps); + exporter.add_count("download", download.total); } - - ~OverlayBDMetric() { - timer.cancel(); - timer.stop(); - } - - uint64_t on_timer() { - return m_timeout; - } - - uint64_t interval(uint64_t x) { return m_timeout = x; } - uint64_t interval() { return m_timeout; } - int start() { return timer.reset(0); } }; struct ExporterServer { - photon::WorkPool wp; - photon::net::http::HTTPServer *httpserver = nullptr; photon::net::ISocketServer *tcpserver = nullptr; bool ready = false; ExporterServer(ImageConfigNS::GlobalConfig &config, - OverlayBDMetric *metrics) - : wp(1, photon::INIT_EVENT_EPOLL, 0, 0) { - wp.call([&] { - prctl(PR_SET_THP_DISABLE, 1); - pthread_setname_np(pthread_self(), "overlaybd-exporter"); - char buffer[64]; - snprintf(buffer, 63, "localhost:%d", config.exporterConfig().port()); - metrics->exporter.nodename = buffer; - - tcpserver = photon::net::new_tcp_socket_server(); - tcpserver->bind(config.exporterConfig().port(), photon::net::IPAddr("127.0.0.1")); - - tcpserver->listen(); - httpserver = photon::net::http::new_http_server(); - httpserver->add_handler(&metrics->exporter); - tcpserver->set_handler(httpserver->get_connection_handler()); - tcpserver->start_loop(); - ready = true; - }); + OverlayBDMetric *metrics) { + tcpserver = photon::net::new_tcp_socket_server(); + tcpserver->setsockopt(SOL_SOCKET, SO_REUSEPORT, 1); + if (tcpserver->bind(config.exporterConfig().port()) < 0) + LOG_ERRNO_RETURN(0, , "Failed to bind exporter port `", + config.exporterConfig().port()); + if (tcpserver->listen() < 0) + LOG_ERRNO_RETURN(0, , "Failed to listen exporter port `", + config.exporterConfig().port()); + httpserver = photon::net::http::new_http_server(); + httpserver->add_handler(&metrics->exporter, false, + config.exporterConfig().uriPrefix()); + tcpserver->set_handler(httpserver->get_connection_handler()); + tcpserver->start_loop(); + ready = true; } ~ExporterServer() { - wp.call([&] { - delete tcpserver; - delete httpserver; - }); + delete tcpserver; + delete httpserver; } }; \ No newline at end of file diff --git a/src/image_service.cpp b/src/image_service.cpp index 87048dc9..0075b137 100644 --- a/src/image_service.cpp +++ b/src/image_service.cpp @@ -346,8 +346,6 @@ int ImageService::init() { } if (global_conf.exporterConfig().enable()) { metrics.reset(new OverlayBDMetric()); - metrics->interval(global_conf.exporterConfig().updateInterval()); - metrics->start(); global_fs.srcfs = new MetricFS(global_fs.underlay_registryfs, &metrics->download); exporter = new ExporterServer(global_conf, metrics.get()); if (!exporter->ready) @@ -410,6 +408,10 @@ int ImageService::init() { LOG_ERRNO_RETURN(0, -1, "failed to create cached_fs"); } + if (global_conf.exporterConfig().enable()) { + global_fs.cached_fs = new MetricFS(global_fs.cached_fs, &metrics->pread); + } + if (global_conf.gzipCacheConfig().enable()) { LOG_INFO("use gzip file cache"); cache_dir = global_conf.gzipCacheConfig().cacheDir(); diff --git a/src/metrics.h b/src/metrics.h deleted file mode 100644 index e6f0e664..00000000 --- a/src/metrics.h +++ /dev/null @@ -1,222 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace Metric { - -class ValueCounter { -public: - int64_t counter = 0; - - void set(int64_t x) { counter = x; } - int64_t val() { return counter; } -}; - -class AddCounter { -public: - int64_t counter = 0; - - void inc() { counter++; } - void dec() { counter--; } - void add(int64_t x) { counter += x; } - void sub(int64_t x) { counter -= x; } - void reset() { counter = 0; } - int64_t val() { return counter; } -}; - -class AverageCounter { -public: - int64_t sum = 0; - int64_t cnt = 0; - uint64_t time = 0; - uint64_t m_interval = 60UL * 1000 * 1000; - - void normalize() { - auto now = photon::now; - if (now - time > m_interval * 2) { - reset(); - } else if (now - time > m_interval) { - sum = photon::sat_sub(sum, sum * (now - time) / m_interval); - cnt = photon::sat_sub(cnt, cnt * (now - time) / m_interval); - time = now; - } - } - void put(int64_t val, int64_t add_cnt = 1) { - normalize(); - sum += val; - cnt += add_cnt; - } - void reset() { - sum = 0; - cnt = 0; - time = photon::now; - } - int64_t interval() { return m_interval; } - int64_t interval(int64_t x) { return m_interval = x; } - int64_t val() { - normalize(); - return cnt ? sum / cnt : 0; - } -}; - -class QPSCounter { -public: - int64_t counter = 0; - uint64_t time = photon::now; - uint64_t m_interval = 1UL * 1000 * 1000; - static constexpr uint64_t SEC = 1UL * 1000 * 1000; - - void normalize() { - auto now = photon::now; - if (now - time >= m_interval * 2) { - reset(); - } else if (now - time > m_interval) { - counter = - photon::sat_sub(counter, counter * (now - time) / m_interval); - time = now; - } - } - void put(int64_t val = 1) { - normalize(); - counter += val; - } - void reset() { - counter = 0; - time = photon::now; - } - uint64_t interval() { return m_interval; } - uint64_t interval(uint64_t x) { return m_interval = x; } - int64_t val() { - normalize(); - return counter; - } -}; - -class MaxCounter { -public: - int64_t maxv = 0; - - void put(int64_t val) { - if (val > maxv) { - maxv = val; - } - } - void reset() { maxv = 0; } - int64_t val() { return maxv; } -}; - -class IntervalMaxCounter { -public: - int64_t maxv = 0, last_max = 0; - uint64_t time = 0; - uint64_t m_interval = 5UL * 1000 * 1000; - - void normalize() { - if (photon::now - time >= 2 * m_interval) { - // no `val` or `put` call in 2 intervals - // last interval max must become 0 - reset(); - } else if (photon::now - time > m_interval) { - // one interval passed - // current maxv become certainly max val in last interval - last_max = maxv; - maxv = 0; - time = photon::now; - } - } - - void put(int64_t val) { - normalize(); - maxv = val > maxv ? val : maxv; - } - - void reset() { - maxv = 0; - last_max = 0; - time = photon::now; - } - - uint64_t interval() { return m_interval; } - - uint64_t interval(uint64_t x) { return m_interval = x; } - - int64_t val() { - normalize(); - return maxv > last_max ? maxv : last_max; - } -}; - -template -class LatencyMetric { -public: - LatencyCounter& counter; - uint64_t start; - - explicit LatencyMetric(LatencyCounter& counter) - : counter(counter), start(photon::now) {} - - // no copy or move; - LatencyMetric(LatencyMetric&&) = delete; - LatencyMetric(const LatencyMetric&) = delete; - LatencyMetric& operator=(LatencyMetric&&) = delete; - LatencyMetric& operator=(const LatencyMetric&) = delete; - - ~LatencyMetric() { counter.put(photon::now - start); } -}; - -class AverageLatencyCounter : public AverageCounter { -public: - using MetricType = LatencyMetric; -}; - -class MaxLatencyCounter : public IntervalMaxCounter { -public: - using MetricType = LatencyMetric; -}; - -#define SCOPE_LATENCY(x) \ - std::decay::type::MetricType _CONCAT(__audit_start_time__, \ - __LINE__)(x); - -static ALogLogger default_metrics_logger; - -#define LOG_METRICS(...) (__LOG_METRICS__(ALOG_AUDIT, __VA_ARGS__)) - -#define __LOG_METRICS__(level, first, ...) \ - ({ \ - DEFINE_PROLOGUE(level, prolog); \ - auto __build_lambda__ = [&](ILogOutput* __output_##__LINE__) { \ - if (_IS_LITERAL_STRING(first)) { \ - __log__(level, __output_##__LINE__, prolog, \ - TSTRING(#first).template strip<'\"'>(), \ - ##__VA_ARGS__); \ - } else { \ - __log__(level, __output_##__LINE__, prolog, \ - ConstString::TString<>(), first, ##__VA_ARGS__); \ - } \ - }; \ - LogBuilder( \ - level, std::move(__build_lambda__), \ - &Metric::default_metrics_logger); \ - }) - -#define LOOP_APPEND_METRIC(ret, name) \ - if (!va_##name.empty()) { \ - ret.append(name.help_str()).append("\n"); \ - ret.append(name.type_str()).append("\n"); \ - for (auto x : va_##name) { \ - ret.append( \ - name.render(x.second->val(), nodename.c_str(), x.first)) \ - .append("\n"); \ - } \ - ret.append("\n"); \ - } - -} // namespace Metric diff --git a/src/metrics_fs.h b/src/metrics_fs.h index 194beae0..51f0fc0b 100644 --- a/src/metrics_fs.h +++ b/src/metrics_fs.h @@ -1,55 +1,72 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #pragma once -#include "metrics.h" -#include +#include #include -#include +struct MetricMeta { + Metric::MaxLatencyCounter latency; + Metric::QPSCounter throughput; + Metric::QPSCounter qps; + Metric::AddCounter total; + Metric::AddCounter interval; -class MetricMeta { -public: - // TODO: add more metrics (error count, virtual block device IO latency, etc...) - Metric::AverageLatencyCounter latency; + MetricMeta() {} }; class MetricFile : public photon::fs::ForwardFile_Ownership { public: MetricMeta *metrics; - MetricFile(IFile *file, MetricMeta *metricMeta) + MetricFile(photon::fs::IFile *file, MetricMeta *metricMeta) : photon::fs::ForwardFile_Ownership(file, true), metrics(metricMeta) {} - virtual ssize_t pread(void *buf, size_t cnt, - off_t offset) override { - auto start = photon::now; - auto ret = m_file->pread(buf, cnt, offset); - if (ret) { - auto duration = photon::now - start; - // latency of read 1MB - metrics->latency.put(duration<<20, ret); + __attribute__((always_inline)) void mark_metrics(ssize_t ret) { + if (ret > 0) { + metrics->throughput.put(ret); + metrics->total.add(ret); + metrics->interval.add(ret); } + } + + virtual ssize_t pread(void *buf, size_t cnt, off_t offset) override { + metrics->qps.put(); + SCOPE_LATENCY(metrics->latency); + auto ret = m_file->pread(buf, cnt, offset); + mark_metrics(ret); return ret; } virtual ssize_t preadv(const struct iovec *iovec, int iovcnt, off_t offset) override { - auto start = photon::now; + metrics->qps.put(); + SCOPE_LATENCY(metrics->latency); auto ret = m_file->preadv(iovec, iovcnt, offset); - if (ret) { - auto duration = photon::now - start; - metrics->latency.put(duration<<20, ret); - } + mark_metrics(ret); return ret; } virtual ssize_t preadv2(const struct iovec *iovec, int iovcnt, off_t offset, int flags) override { - auto start = photon::now; + metrics->qps.put(); + SCOPE_LATENCY(metrics->latency); auto ret = m_file->preadv2(iovec, iovcnt, offset, flags); - if (ret) { - auto duration = photon::now - start; - metrics->latency.put(duration<<20, ret); - } + mark_metrics(ret); return ret; } }; @@ -57,7 +74,8 @@ class MetricFile : public photon::fs::ForwardFile_Ownership { class MetricFS : public photon::fs::ForwardFS_Ownership { public: MetricMeta *metrics; - MetricFS(IFileSystem *fs, MetricMeta *metricMeta) + + MetricFS(photon::fs::IFileSystem *fs, MetricMeta *metricMeta) : photon::fs::ForwardFS_Ownership(fs, true), metrics(metricMeta) {} virtual photon::fs::IFile *open(const char *fn, int flags) override { @@ -72,5 +90,4 @@ class MetricFS : public photon::fs::ForwardFS_Ownership { if (!file) return nullptr; return new MetricFile(file, metrics); } - }; diff --git a/src/textexporter.h b/src/textexporter.h index 0299ca76..0becf225 100644 --- a/src/textexporter.h +++ b/src/textexporter.h @@ -1,3 +1,19 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #pragma once #include #include From 937ef47b9dd683b531755cfc7fee4f6586485948 Mon Sep 17 00:00:00 2001 From: Lanzheng Liu Date: Tue, 7 Nov 2023 11:40:26 +0800 Subject: [PATCH 19/31] fix mkfs by marking inode 1 Signed-off-by: Lanzheng Liu --- src/overlaybd/extfs/mkfs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/overlaybd/extfs/mkfs.cpp b/src/overlaybd/extfs/mkfs.cpp index ceb60669..2ab6ee84 100644 --- a/src/overlaybd/extfs/mkfs.cpp +++ b/src/overlaybd/extfs/mkfs.cpp @@ -115,6 +115,7 @@ int do_mkfs(io_manager manager, size_t size) { return ret; } // reserve inodes + ext2fs_inode_alloc_stats2(fs, EXT2_BAD_INO, +1, 0); for (ext2_ino_t i = EXT2_ROOT_INO + 1; i < EXT2_FIRST_INODE(fs->super); i++) ext2fs_inode_alloc_stats2(fs, i, +1, 0); ext2fs_mark_ib_dirty(fs); From 9a49a3cddb3e706a568d328f30bad805b1617fbe Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Tue, 7 Nov 2023 14:36:21 +0800 Subject: [PATCH 20/31] support create raw image, extfs test use make_extfs Signed-off-by: yuchen.cc --- src/overlaybd/extfs/test/test.cpp | 19 +++++++++++-------- src/tools/overlaybd-create.cpp | 10 ++++++++-- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/overlaybd/extfs/test/test.cpp b/src/overlaybd/extfs/test/test.cpp index a0c8c885..11e5b3dc 100644 --- a/src/overlaybd/extfs/test/test.cpp +++ b/src/overlaybd/extfs/test/test.cpp @@ -269,18 +269,21 @@ int remove_all(photon::fs::IFileSystem *fs, const std::string &path) { photon::fs::IFileSystem *init_extfs() { std::string rootfs = "/tmp/rootfs.img"; - // mkfs - std::string cmd = "mkfs.ext4 -F -b 4096 " + rootfs + " 100M"; - auto ret = system(cmd.c_str()); - if (ret != 0) { - LOG_ERRNO_RETURN(0, nullptr, "failed mkfs"); - } - // new extfs - auto image_file = photon::fs::open_localfile_adaptor(rootfs.c_str(), O_RDWR, 0644, 0); + auto image_file = photon::fs::open_localfile_adaptor(rootfs.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0644, 0); if (!image_file) { LOG_ERRNO_RETURN(0, nullptr, "failed to open `", rootfs); } + if (image_file->ftruncate(100 << 20) < 0) { + delete image_file; + LOG_ERRNO_RETURN(0, nullptr, "failed to truncate image to 100M"); + } + + if (make_extfs(image_file) < 0) { + delete image_file; + LOG_ERRNO_RETURN(0, nullptr, "failed to mkfs"); + } + photon::fs::IFileSystem *extfs = new_extfs(image_file); if (!extfs) { delete image_file; diff --git a/src/tools/overlaybd-create.cpp b/src/tools/overlaybd-create.cpp index 7e773e41..63c6997c 100644 --- a/src/tools/overlaybd-create.cpp +++ b/src/tools/overlaybd-create.cpp @@ -53,6 +53,7 @@ int main(int argc, char **argv) { bool build_turboOCI = false; bool build_fastoci = false; bool mkfs = false; + bool raw = false; bool verbose = false; CLI::App app{"this is overlaybd-create"}; @@ -60,6 +61,7 @@ int main(int argc, char **argv) { app.add_flag("-s", sparse, "create sparse RW layer")->default_val(false); app.add_flag("--turboOCI", build_turboOCI, "commit using turboOCI format")->default_val(false); app.add_flag("--fastoci", build_fastoci, "commit using turboOCI format(depracated)")->default_val(false); + app.add_flag("--raw", raw, "create raw image")->default_val(false); app.add_flag("--mkfs", mkfs, "mkfs after create")->default_val(false); app.add_option("data_file", data_file_path, "data file path")->type_name("FILEPATH")->required(); app.add_option("index_file", index_file_path, "index file path")->type_name("FILEPATH")->required(); @@ -80,7 +82,10 @@ int main(int argc, char **argv) { IFile* findex = open_file(index_file_path.c_str(), flag, mode); IFile* file = nullptr; - if (build_turboOCI) { + if (raw) { + file = fdata; + file->ftruncate(vsize); + } else if (build_turboOCI) { LSMT::WarpFileArgs args(findex, fdata, nullptr); args.virtual_size = vsize; file = LSMT::create_warpfile(args, false); @@ -104,7 +109,8 @@ int main(int argc, char **argv) { } } - delete file; + if (file && file != fdata) + delete file; delete fdata; delete findex; printf("overlaybd-create has created files SUCCESSFULLY\n"); From aefdee9ea268b27cd086ff3110d37506496e3e0a Mon Sep 17 00:00:00 2001 From: Lanzheng Liu Date: Fri, 10 Nov 2023 17:25:00 +0800 Subject: [PATCH 21/31] Add support for vsize = 0 to overlaybd-create indicating that the vsize is inherited from the lower layers. Signed-off-by: Lanzheng Liu --- src/overlaybd/lsmt/file.cpp | 57 ++++++++++++++-------------------- src/tools/overlaybd-create.cpp | 2 +- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/src/overlaybd/lsmt/file.cpp b/src/overlaybd/lsmt/file.cpp index f76004d4..a36246f6 100644 --- a/src/overlaybd/lsmt/file.cpp +++ b/src/overlaybd/lsmt/file.cpp @@ -1570,8 +1570,7 @@ void *do_parallel_load_index(void *param) { return NULL; } -static IMemoryIndex *load_merge_index(vector &files, vector &uuid, - HeaderTrailer &ht) { +static IMemoryIndex *load_merge_index(vector &files, vector &uuid, uint64_t &vsize) { photon::join_handle *ths[PARALLEL_LOAD_INDEX]; auto n = min(PARALLEL_LOAD_INDEX, (int)files.size()); LOG_DEBUG("create ` photon threads to merge index", n); @@ -1590,7 +1589,13 @@ static IMemoryIndex *load_merge_index(vector &files, vector &uuid uuid[i].parse(job->ht.uuid); } assert(tm.jobs.back().i == files.size() - 1); - ht = tm.jobs.back().ht; + for (auto it = tm.jobs.rbegin(); it != tm.jobs.rend(); ++it) { + if ((*it).ht.virtual_size > 0) { + vsize = (*it).ht.virtual_size; + break; + } + } + std::reverse(files.begin(), files.end()); std::reverse(tm.indexes.begin(), tm.indexes.end()); std::reverse(uuid.begin(), uuid.end()); @@ -1598,7 +1603,6 @@ static IMemoryIndex *load_merge_index(vector &files, vector &uuid if (!pmi) LOG_ERROR_RETURN(0, nullptr, "failed to merge indexes"); return pmi; - // all indexes will be deleted automatically by ptr_vector } IFileRO *open_files_ro(IFile **files, size_t n, bool ownership) { @@ -1608,10 +1612,10 @@ IFileRO *open_files_ro(IFile **files, size_t n, bool ownership) { if (!files || n == 0) return nullptr; - HeaderTrailer ht; + uint64_t vsize; vector m_files(files, files + n); vector m_uuid(n); - auto pmi = load_merge_index(m_files, m_uuid, ht); + auto pmi = load_merge_index(m_files, m_uuid, vsize); if (!pmi) return nullptr; @@ -1619,7 +1623,7 @@ IFileRO *open_files_ro(IFile **files, size_t n, bool ownership) { rst->m_index = pmi; rst->m_files = move(m_files); rst->m_uuid = move(m_uuid); - rst->m_vsize = ht.virtual_size; + rst->m_vsize = vsize; rst->m_file_ownership = ownership; LOG_DEBUG("open ` layers", n); @@ -1630,9 +1634,9 @@ IFileRO *open_files_ro(IFile **files, size_t n, bool ownership) { } int merge_files_ro(vector files, const CommitArgs &args) { - HeaderTrailer ht; + uint64_t vsize; vector files_uuid(files.size()); - auto pmi = unique_ptr(load_merge_index(files, files_uuid, ht)); + auto pmi = unique_ptr(load_merge_index(files, files_uuid, vsize)); if (!pmi) return -1; @@ -1641,7 +1645,7 @@ int merge_files_ro(vector files, const CommitArgs &args) { unique_ptr DISCARD_BLOCK(new char[ALIGNMENT]); atomic_uint64_t _no_use_var(0); - CompactOptions opts(&files, ri.get(), pmi->size(), ht.virtual_size, &args); + CompactOptions opts(&files, ri.get(), pmi->size(), vsize, &args); int ret = compact(opts, _no_use_var); return ret; } @@ -1682,7 +1686,7 @@ IFileRW *stack_files(IFileRW *upper_layer, IFileRO *lower_layers, bool ownership bool check_order) { auto u = (LSMTFile *)upper_layer; auto l = (LSMTReadOnlyFile *)lower_layers; - if (!u /*|| u->m_files.size() != 1*/) + if (!u) LOG_ERROR_RETURN(EINVAL, nullptr, "invalid upper layer"); if (!l) return upper_layer; @@ -1698,33 +1702,22 @@ IFileRW *stack_files(IFileRW *upper_layer, IFileRO *lower_layers, bool ownership } if (!pht->is_sparse_rw()) { rst = new LSMTFile; + if (u->m_vsize == 0) { + u->m_vsize = l->m_vsize; + LOG_INFO("update upper vsize as lower vsize, vsize:`", u->m_vsize); + } } else { rst = new LSMTSparseFile; + if (u->m_vsize == 0) { + u->m_vsize = l->m_vsize; + u->m_files[0]->ftruncate(u->m_vsize + HeaderTrailer::SPACE); + LOG_INFO("update upper vsize as lower vsize and truncate upper sparse file, vsize:`", u->m_vsize); + } } } else { rst = new LSMTWarpFile; delta++; } - // rst->m_index = idx; - // rst->m_findex = u->m_findex; - // rst->m_vsize = u->m_vsize; - // rst->m_file_ownership = ownership; - // rst->m_files.reserve(2 + l->m_files.size()); - // rst->m_uuid.reserve(1 + l->m_uuid.size()); - // for (auto &x : l->m_files) - // rst->m_files.push_back(x); - // for (auto &x : l->m_uuid) - // rst->m_uuid.push_back(x); - // rst->m_files.push_back(u->m_files[1]); - // rst->m_uuid.push_back(u->m_uuid[0]); - // u->m_index = l->m_index = nullptr; - // rst->m_rw_tag = rst->m_files.size() - 2; - // l->m_file_ownership = u->m_file_ownership = false; - // if (ownership) { - // delete u; - // delete l; - // } - // return rst; idx = create_combo_index((IMemoryIndex0 *)u->m_index, l->m_index, l->m_files.size(), true); rst->m_index = idx; rst->m_findex = u->m_findex; @@ -1741,8 +1734,6 @@ IFileRW *stack_files(IFileRW *upper_layer, IFileRO *lower_layers, bool ownership if (verify_order(rst->m_files, rst->m_uuid, 1) == false) return nullptr; LOG_INFO("check layer's parent uuid success."); - } else { - LOG_WARN("STACK FILES WITHOUT CHECK ORDER!!!"); } rst->m_files.push_back(u->m_files[0]); if (type == (uint8_t)LSMTFileType::WarpFile) { diff --git a/src/tools/overlaybd-create.cpp b/src/tools/overlaybd-create.cpp index 63c6997c..51d6221b 100644 --- a/src/tools/overlaybd-create.cpp +++ b/src/tools/overlaybd-create.cpp @@ -65,7 +65,7 @@ int main(int argc, char **argv) { app.add_flag("--mkfs", mkfs, "mkfs after create")->default_val(false); app.add_option("data_file", data_file_path, "data file path")->type_name("FILEPATH")->required(); app.add_option("index_file", index_file_path, "index file path")->type_name("FILEPATH")->required(); - app.add_option("vsize", vsize, "virtual size(GB)")->type_name("INT")->check(CLI::PositiveNumber)->required(); + app.add_option("vsize", vsize, "virtual size(GB)")->type_name("INT")->check(CLI::NonNegativeNumber)->required(); app.add_flag("--verbose", verbose, "output debug info")->default_val(false); CLI11_PARSE(app, argc, argv); From 923ed1019aa24bc63bec2d5c7dce01222b40a0fc Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Tue, 14 Nov 2023 17:34:43 +0800 Subject: [PATCH 22/31] registry: enhanced error handling Signed-off-by: liulanzheng --- src/overlaybd/registryfs/registryfs_v2.cpp | 36 ++++++++++++---------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/overlaybd/registryfs/registryfs_v2.cpp b/src/overlaybd/registryfs/registryfs_v2.cpp index 5e783714..70abac7d 100644 --- a/src/overlaybd/registryfs/registryfs_v2.cpp +++ b/src/overlaybd/registryfs/registryfs_v2.cpp @@ -152,8 +152,8 @@ class RegistryFSImpl_v2 : public RegistryFS { op.retry = 0; op.timeout = tmo.timeout(); m_client->call(&op); - - if (op.status_code == 200 || op.status_code == 206) { + ret = op.status_code; + if (ret == 200 || ret == 206) { m_url_info.release(url); return ret; } @@ -212,17 +212,18 @@ class RegistryFSImpl_v2 : public RegistryFS { op.req.headers.value_append(*token); op.timeout = tmo.timeout(); op.call(); - if (op.status_code == 401 || op.status_code == 403) { + code = op.status_code; + if (code == 401 || code == 403) { LOG_WARN("Token invalid, try refresh password next time"); } - if (300 <= op.status_code && op.status_code < 400) { + if (300 <= code && code < 400) { // pass auth, redirect to source auto location = op.resp.headers["Location"]; if (!scope.empty()) m_scope_token.release(scope); return new UrlInfo{UrlMode::Redirect, location}; } - if (op.status_code == 200) { + if (code == 200) { UrlInfo *info = new UrlInfo{UrlMode::Self, ""}; if (token && !token->empty()) info->info = kBearerAuthPrefix + *token; @@ -234,7 +235,7 @@ class RegistryFSImpl_v2 : public RegistryFS { // unexpected situation if (!scope.empty()) m_scope_token.release(scope, true); - LOG_ERROR_RETURN(0, nullptr, "Failed to get actual url, status_code=` ", op.status_code, VALUE(url)); + LOG_ERROR_RETURN(0, nullptr, "Failed to get actual url, status_code=` ", code, VALUE(url)); } virtual int setAccelerateAddress(const char* addr = "") override { @@ -403,15 +404,15 @@ class RegistryFileImpl_v2 : public photon::fs::VirtualReadOnlyFile { LOG_DEBUG("pulling blob from registry: ", VALUE(m_url), VALUE(offset), VALUE(count)); HTTP_OP op; - auto ret = m_fs->get_data(m_url, offset, count, tmo.timeout(), op); - if (op.status_code != 200 && op.status_code != 206) { + auto code = m_fs->get_data(m_url, offset, count, tmo.timeout(), op); + if (code != 200 && code != 206) { ERRNO eno; if (tmo.expire() < photon::now) { LOG_ERROR_RETURN(ETIMEDOUT, -1, "timed out in preadv ", VALUE(m_url), VALUE(offset)); } if (retry--) { - LOG_WARN("failed to perform HTTP GET, going to retry ", VALUE(op.status_code), VALUE(offset), - VALUE(count), VALUE(ret), eno); + LOG_WARN("failed to perform HTTP GET, going to retry ", VALUE(code), VALUE(offset), + VALUE(count), eno); photon::thread_usleep(1000); goto again; } else { @@ -427,18 +428,19 @@ class RegistryFileImpl_v2 : public photon::fs::VirtualReadOnlyFile { int retry = 3; again: HTTP_OP op; - auto ret = m_fs->get_data(m_url, 0, 1, tmo.timeout(), op); - if (op.status_code != 200 && op.status_code != 206) { + auto code = m_fs->get_data(m_url, 0, 1, tmo.timeout(), op); + if (code != 200 && code != 206) { if (tmo.expire() < photon::now) LOG_ERROR_RETURN(ETIMEDOUT, -1, "get meta timedout"); - - if (op.status_code == 401 || op.status_code == 403) { - if (retry--) + if (retry--) goto again; + if (code == 401 || code == 403) { LOG_ERROR_RETURN(EPERM, -1, "Authorization failed"); + } else if (code == 404) { + LOG_ERROR_RETURN(ENOENT, -1, "No such file or directory"); + } else if (code == 429) { + LOG_ERROR_RETURN(EBUSY, -1, "Too many request"); } - if (retry--) - goto again; LOG_ERROR_RETURN(ENOENT, -1, "failed to get meta from server"); } return op.resp.resource_size(); From 930a1f3dfb7c94e402b9967939b0bf3e9eb68106 Mon Sep 17 00:00:00 2001 From: Lanzheng Liu Date: Wed, 15 Nov 2023 22:44:57 +0800 Subject: [PATCH 23/31] update vsize when lsmt stack files if needed Signed-off-by: Lanzheng Liu --- src/overlaybd/lsmt/file.cpp | 51 +++++++++++++++++++++++++++++++------ src/overlaybd/lsmt/file.h | 4 ++- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/overlaybd/lsmt/file.cpp b/src/overlaybd/lsmt/file.cpp index a36246f6..6371f886 100644 --- a/src/overlaybd/lsmt/file.cpp +++ b/src/overlaybd/lsmt/file.cpp @@ -603,6 +603,8 @@ class LSMTReadOnlyFile : public IFileRW { LOG_ERROR_RETURN(ENOSYS, nullptr, "no underlying files found!"); return file->filesystem(); } + + UNIMPLEMENTED(int update_vsize(size_t vsize) override); UNIMPLEMENTED(int close_seal(IFileRO **reopen_as = nullptr) override); // It can commit a RO file after close_seal() @@ -834,6 +836,31 @@ class LSMTFile : public LSMTReadOnlyFile { return 0; } + int update_header_vsize(IFile *file, size_t vsize) { + ALIGNED_MEM(buf, HeaderTrailer::SPACE, ALIGNMENT4K) + if (file->pread(buf, HeaderTrailer::SPACE, 0) != HeaderTrailer::SPACE) { + LOG_ERROR_RETURN(0, -1, "read layer header failed."); + } + HeaderTrailer *ht = (HeaderTrailer *)buf; + ht->virtual_size = vsize; + if (file->pwrite(buf, HeaderTrailer::SPACE, 0) != HeaderTrailer::SPACE) { + LOG_ERROR_RETURN(0, -1, "write layer header failed."); + } + return 0; + } + + virtual int update_vsize(size_t vsize) override { + LOG_INFO("update vsize for LSMTFile ", VALUE(vsize)); + m_vsize = vsize; + if (update_header_vsize(m_files[m_rw_tag], vsize) < 0) { + LOG_ERROR_RETURN(0, -1, "failed to update data vsize"); + } + if (update_header_vsize(m_findex, vsize) < 0) { + LOG_ERROR_RETURN(0, -1, "failed to update index vsize"); + } + return 0; + } + virtual int commit(const CommitArgs &args) const override { if (m_files.size() > 1) { LOG_ERROR_RETURN(ENOTSUP, -1, "not supported: commit stacked files"); @@ -1011,6 +1038,16 @@ class LSMTSparseFile : public LSMTFile { LOG_INFO("segment size: `", mappings.size()); return 0; } + + virtual int update_vsize(size_t vsize) override { + LOG_INFO("update vsize for LSMTSparseFile ", VALUE(vsize)); + m_vsize = vsize; + if (update_header_vsize(m_files[m_rw_tag], vsize) < 0) { + LOG_ERROR_RETURN(0, -1, "failed to update data vsize"); + } + m_files[m_rw_tag]->ftruncate(vsize + HeaderTrailer::SPACE); + return 0; + } }; class LSMTWarpFile : public LSMTFile { @@ -1690,6 +1727,7 @@ IFileRW *stack_files(IFileRW *upper_layer, IFileRO *lower_layers, bool ownership LOG_ERROR_RETURN(EINVAL, nullptr, "invalid upper layer"); if (!l) return upper_layer; + auto type = u->ioctl(IFileRO::GetType); LSMTFile *rst = nullptr; IComboIndex *idx = nullptr; @@ -1702,16 +1740,13 @@ IFileRW *stack_files(IFileRW *upper_layer, IFileRO *lower_layers, bool ownership } if (!pht->is_sparse_rw()) { rst = new LSMTFile; - if (u->m_vsize == 0) { - u->m_vsize = l->m_vsize; - LOG_INFO("update upper vsize as lower vsize, vsize:`", u->m_vsize); - } } else { rst = new LSMTSparseFile; - if (u->m_vsize == 0) { - u->m_vsize = l->m_vsize; - u->m_files[0]->ftruncate(u->m_vsize + HeaderTrailer::SPACE); - LOG_INFO("update upper vsize as lower vsize and truncate upper sparse file, vsize:`", u->m_vsize); + } + // TODO: also for LSMTWarpFile + if (u->m_vsize == 0) { + if (u->update_vsize(l->m_vsize) < 0) { + LOG_ERRNO_RETURN(0, nullptr, "failed to update vsize"); } } } else { diff --git a/src/overlaybd/lsmt/file.h b/src/overlaybd/lsmt/file.h index 2d3aa096..51bbcf16 100644 --- a/src/overlaybd/lsmt/file.h +++ b/src/overlaybd/lsmt/file.h @@ -74,10 +74,12 @@ class IFileRW : public IFileRO { return this->ioctl(Index_Group_Commit, buffer_size); } + // update vsize for current rw layer + virtual int update_vsize(size_t vsize) = 0; + // commit the written content as a new file, without garbages // return 0 for success, -1 otherwise virtual int commit(const CommitArgs &args) const = 0; - // virtual int commit(IFile* as) const = 0; // close and seal current file, optionally returning a new // read-only file, with ownership of underlaying file transferred From 4ab44656f9d9324e585f60a9401ff43ef26f9e42 Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Fri, 17 Nov 2023 12:15:04 +0800 Subject: [PATCH 24/31] add retry for zfile decompress and fix Signed-off-by: liulanzheng --- src/overlaybd/zfile/zfile.cpp | 54 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/overlaybd/zfile/zfile.cpp b/src/overlaybd/zfile/zfile.cpp index 43f99946..e0ba88cd 100644 --- a/src/overlaybd/zfile/zfile.cpp +++ b/src/overlaybd/zfile/zfile.cpp @@ -311,8 +311,8 @@ class CompressionFile : public VirtualReadOnlyFile { auto readn = m_zfile->m_file->pread(m_buf, read_size, begin_offset); if (readn != (ssize_t)read_size) { m_eno = (errno ? errno : EIO); - LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (offset: `, len: `)", - begin_offset, read_size); + LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (offset: `, len: `, ret: `)", + begin_offset, read_size, readn); } return 0; } @@ -466,29 +466,29 @@ class CompressionFile : public VirtualReadOnlyFile { LOG_ERROR_RETURN(ENOMEM, -1, "block_size: ` > MAX_READ_SIZE (`)", m_ht.opt.block_size, MAX_READ_SIZE); } - if (offset + count > m_ht.original_file_size) { - LOG_WARN("the read range exceeds raw_file_size.(`>`)", count + offset, + ssize_t cnt = count; + if (offset + cnt > (ssize_t)m_ht.original_file_size) { + LOG_WARN("the read range exceeds raw_file_size.(`>`)", cnt + offset, m_ht.original_file_size); - count = m_ht.original_file_size - offset; + cnt = m_ht.original_file_size - offset; } - if (count <= 0) + if (cnt <= 0) { + LOG_WARN("the read offset exceeds raw_file_size.(`>`)", offset, + m_ht.original_file_size); return 0; - if (offset + count > m_ht.original_file_size) { - LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", offset + count, - m_ht.original_file_size); } ssize_t readn = 0; // final will equal to count unsigned char raw[MAX_READ_SIZE]; - BlockReader br(this, offset, count); + BlockReader br(this, offset, cnt); for (auto &block : br) { if (buf == nullptr) { // used for prefetch; no copy, no decompress; readn += block.cp_len; continue; } + int retry = 3; + again: if (m_ht.opt.verify) { - int retry = 2; - again: auto c = crc32c((void *)block.buffer(), block.compressed_size); if (c != block.crc32_code()) { if ((valid == FLAG_VALID_TRUE) && (retry--)) { @@ -515,17 +515,29 @@ class CompressionFile : public VirtualReadOnlyFile { readn += block.cp_len; continue; } + int dret = -1; if (block.cp_len == m_ht.opt.block_size) { - auto dret = m_compressor->decompress(block.buffer(), block.compressed_size, - (unsigned char *)buf, m_ht.opt.block_size); - if (dret == -1) - return -1; + dret = m_compressor->decompress(block.buffer(), block.compressed_size, + (unsigned char *)buf, m_ht.opt.block_size); } else { - auto dret = m_compressor->decompress(block.buffer(), block.compressed_size, raw, - m_ht.opt.block_size); - if (dret == -1) - return -1; - memcpy(buf, raw + block.cp_begin, block.cp_len); + dret = m_compressor->decompress(block.buffer(), block.compressed_size, raw, + m_ht.opt.block_size); + if (dret != -1) + memcpy(buf, raw + block.cp_begin, block.cp_len); + } + if (dret == -1) { + if (retry--) { + int reload_res = block.reload(); + LOG_ERROR("decompression failed {offset: `, length: `}, reload result: `", + block.m_reader->m_buf_offset, block.compressed_size, reload_res); + if (reload_res < 0) { + LOG_ERRNO_RETURN(0, -1, "decompression and reload failed"); + } + goto again; + } + LOG_ERRNO_RETURN(0, -1, + "decompression failed after retries, {offset: `, length: `}", + block.m_reader->m_buf_offset, block.compressed_size); } readn += block.cp_len; buf = (unsigned char *)buf + block.cp_len; From 31cb7ac775842053a81e937be310690f09138573 Mon Sep 17 00:00:00 2001 From: "yuchen.cc" Date: Thu, 16 Nov 2023 16:12:03 +0800 Subject: [PATCH 25/31] fix compatible of BUILD_CURL_FROM_SOURCE Signed-off-by: yuchen.cc --- CMake/{FindCURL.cmake => Findcurl.cmake} | 3 ++- CMake/{FindOpenSSL.cmake => Findopenssl.cmake} | 0 CMake/Findphoton.cmake | 4 ++-- src/CMakeLists.txt | 4 ++-- src/overlaybd/cache/ocf_cache/test/CMakeLists.txt | 3 +-- src/overlaybd/registryfs/CMakeLists.txt | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) rename CMake/{FindCURL.cmake => Findcurl.cmake} (97%) rename CMake/{FindOpenSSL.cmake => Findopenssl.cmake} (100%) diff --git a/CMake/FindCURL.cmake b/CMake/Findcurl.cmake similarity index 97% rename from CMake/FindCURL.cmake rename to CMake/Findcurl.cmake index ab4e44b4..de370a54 100644 --- a/CMake/FindCURL.cmake +++ b/CMake/Findcurl.cmake @@ -17,7 +17,7 @@ if(${BUILD_CURL_FROM_SOURCE}) if (NOT curl_bundle_POPULATED) FetchContent_Populate(curl_bundle) endif() - find_package(OpenSSL) + find_package(openssl) add_custom_command( OUTPUT ${curl_bundle_BINARY_DIR}/lib/libcurl.a WORKING_DIRECTORY ${curl_bundle_SOURCE_DIR} @@ -26,6 +26,7 @@ if(${BUILD_CURL_FROM_SOURCE}) export CXX=${CMAKE_CXX_COMPILER} && export LD=${CMAKE_LINKER} && export CFLAGS=-fPIC && + export LIBS=-ldl && autoreconf -i && sh configure --with-ssl="${OPENSSL_ROOT_DIR}" --without-libssh2 --enable-static --enable-shared=no --enable-optimize --disable-manual --without-libidn diff --git a/CMake/FindOpenSSL.cmake b/CMake/Findopenssl.cmake similarity index 100% rename from CMake/FindOpenSSL.cmake rename to CMake/Findopenssl.cmake diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index 90b7fdf2..4c123220 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -16,8 +16,8 @@ else() endif() if (BUILD_CURL_FROM_SOURCE) - find_package(OpenSSL REQUIRED) - find_package(CURL REQUIRED) + find_package(openssl REQUIRED) + find_package(curl REQUIRED) add_dependencies(photon_obj CURL::libcurl OpenSSL::SSL OpenSSL::Crypto) endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c8372e4..599b8b5b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,5 @@ -find_package(CURL REQUIRED) -find_package(OpenSSL REQUIRED) +find_package(curl REQUIRED) +find_package(openssl REQUIRED) find_package(aio REQUIRED) find_package(rapidjson REQUIRED) diff --git a/src/overlaybd/cache/ocf_cache/test/CMakeLists.txt b/src/overlaybd/cache/ocf_cache/test/CMakeLists.txt index 51a0668a..6c629cc1 100644 --- a/src/overlaybd/cache/ocf_cache/test/CMakeLists.txt +++ b/src/overlaybd/cache/ocf_cache/test/CMakeLists.txt @@ -1,7 +1,7 @@ include_directories($ENV{GFLAGS}/include) link_directories($ENV{GFLAGS}/lib) -find_package(CURL REQUIRED) +find_package(curl REQUIRED) add_executable(ocf_perf_test ocf_perf_test.cpp) target_include_directories( @@ -19,4 +19,3 @@ add_test( NAME ocf_perf_test COMMAND ${EXECUTABLE_OUTPUT_PATH}/ocf_perf_test --ut_pass=true ) - diff --git a/src/overlaybd/registryfs/CMakeLists.txt b/src/overlaybd/registryfs/CMakeLists.txt index b94525d8..7ce30abe 100644 --- a/src/overlaybd/registryfs/CMakeLists.txt +++ b/src/overlaybd/registryfs/CMakeLists.txt @@ -1,6 +1,6 @@ file(GLOB SOURCE_REGISTRYFS "*.cpp") -find_package(CURL REQUIRED) +find_package(curl REQUIRED) add_library(registryfs_lib STATIC ${SOURCE_REGISTRYFS}) target_include_directories(registryfs_lib PUBLIC From 1a3b35f694d8a9fb1f07037f1c04220dd9806a5f Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Fri, 17 Nov 2023 16:21:32 +0800 Subject: [PATCH 26/31] fix sign-compares and lz4 include Signed-off-by: liulanzheng --- src/bk_download.cpp | 4 ++-- src/image_file.cpp | 4 ++-- src/image_service.cpp | 2 +- src/overlaybd/cache/ocf_cache/CMakeLists.txt | 2 +- src/overlaybd/extfs/test/test.cpp | 2 +- src/overlaybd/gzindex/test/test.cpp | 6 +++--- src/overlaybd/zfile/compressor.h | 1 - src/prefetch.cpp | 2 +- src/test/simple_credsrv_test.cpp | 2 +- 9 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/bk_download.cpp b/src/bk_download.cpp index f0690ed1..61179082 100644 --- a/src/bk_download.cpp +++ b/src/bk_download.cpp @@ -195,7 +195,7 @@ bool BkDownload::download_blob() { DEFER(free(buff)); LOG_INFO("download blob start. (`)", url); - while (offset < file_size) { + while (offset < (ssize_t)file_size) { if (running != 1) { LOG_INFO("image file exit when background downloading"); return false; @@ -203,7 +203,7 @@ bool BkDownload::download_blob() { if (!force_download) { // check aleady downloaded. auto hole_pos = dst->lseek(offset, SEEK_HOLE); - if (hole_pos >= offset + bs) { + if (hole_pos >= offset + (ssize_t)bs) { // alread downloaded offset += bs; continue; diff --git a/src/image_file.cpp b/src/image_file.cpp index 23e27107..7fa34e29 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -341,7 +341,7 @@ LSMT::IFileRO *ImageFile::open_lowers(std::vector &l photon::thread_join(ths[i]); } - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { if (files[i] == NULL) { LOG_ERROR("layer index ` open failed, exit.", i); if (m_exception == "") @@ -367,7 +367,7 @@ LSMT::IFileRO *ImageFile::open_lowers(std::vector &l if (m_exception == "") { m_exception = "failed to create overlaybd device"; } - for (int i = 0; i < lowers.size(); i++) { + for (size_t i = 0; i < lowers.size(); i++) { if (files[i] != NULL) delete files[i]; } diff --git a/src/image_service.cpp b/src/image_service.cpp index 0075b137..d1c6f4a8 100644 --- a/src/image_service.cpp +++ b/src/image_service.cpp @@ -74,7 +74,7 @@ int parse_blob_url(const std::string &url, struct ImageRef &ref) { prev = idx + 1; } ref.seg = std::vector{words[0]}; - for (int i = 2; i + 1 < words.size(); i++) { + for (size_t i = 2; i + 1 < words.size(); i++) { ref.seg.push_back(words[i]); } } diff --git a/src/overlaybd/cache/ocf_cache/CMakeLists.txt b/src/overlaybd/cache/ocf_cache/CMakeLists.txt index 744bd99f..9bdca075 100644 --- a/src/overlaybd/cache/ocf_cache/CMakeLists.txt +++ b/src/overlaybd/cache/ocf_cache/CMakeLists.txt @@ -12,7 +12,7 @@ file(GLOB_RECURSE src_ocf ocf/src/*.c) add_library(ocf_lib STATIC ${src_ocf}) target_include_directories(ocf_lib PUBLIC include/ ease_bindings/env/) target_link_libraries(ocf_lib ocf_env_lib z) -target_compile_options(ocf_lib PUBLIC -Wno-sign-compare) +target_compile_options(ocf_lib PRIVATE -Wno-sign-compare) # ocf_cache_lib file(GLOB src_ocf_cache ocf_cache.cpp ocf_namespace.cpp ease_bindings/*.cpp) diff --git a/src/overlaybd/extfs/test/test.cpp b/src/overlaybd/extfs/test/test.cpp index 11e5b3dc..7b2057a0 100644 --- a/src/overlaybd/extfs/test/test.cpp +++ b/src/overlaybd/extfs/test/test.cpp @@ -57,7 +57,7 @@ int write_file(photon::fs::IFile *file) { while (aa.size() < FILE_SIZE) aa.append(bb); auto ret = file->pwrite(aa.data(), aa.size(), 0); - if (ret != aa.size()) { + if (ret != (ssize_t)aa.size()) { LOG_ERRNO_RETURN(0, -1, "failed write file ", VALUE(aa.size()), VALUE(ret)) } LOG_DEBUG("write ` byte", ret); diff --git a/src/overlaybd/gzindex/test/test.cpp b/src/overlaybd/gzindex/test/test.cpp index 2ef535d7..940da52c 100644 --- a/src/overlaybd/gzindex/test/test.cpp +++ b/src/overlaybd/gzindex/test/test.cpp @@ -139,7 +139,7 @@ class GzIndexTest : public ::testing::Test { if (gzdata == nullptr) { LOG_ERRNO_RETURN(0, -1, "failed to create `", fn_gzdata); } - if (gzdata->pwrite(gzbuf, gzlen, 0) != gzlen) { + if (gzdata->pwrite(gzbuf, gzlen, 0) != (ssize_t)gzlen) { LOG_ERRNO_RETURN(0, -1, "failed to pwrite `", fn_gzdata); } return 0; @@ -373,7 +373,7 @@ class GzCacheTest : public ::testing::Test { if (gzdata == nullptr) { LOG_ERRNO_RETURN(0, -1, "failed to create `", fn_gzdata); } - if (gzdata->pwrite(gzbuf, gzlen, 0) != gzlen) { + if (gzdata->pwrite(gzbuf, gzlen, 0) != (ssize_t)gzlen) { LOG_ERRNO_RETURN(0, -1, "failed to pwrite `", fn_gzdata); } return 0; @@ -446,7 +446,7 @@ TEST_F(GzCacheTest, cache_store) { fread(cbuf1, 1, vsize, fp1); fread(cbuf2, 1, vsize, fp2); // refill_size is 1MB - for (int i = 0; i < vsize; i++) { + for (size_t i = 0; i < vsize; i++) { if (check_in_interval(i, 0, 1 << 20) || check_in_interval(i, vsize - (1 << 20), vsize) || check_in_interval(i, 5 << 20, 6 << 20)) { diff --git a/src/overlaybd/zfile/compressor.h b/src/overlaybd/zfile/compressor.h index a56605b6..bb4899f4 100644 --- a/src/overlaybd/zfile/compressor.h +++ b/src/overlaybd/zfile/compressor.h @@ -20,7 +20,6 @@ #include #include #include -#include "lz4/lz4.h" namespace photon { namespace fs { diff --git a/src/prefetch.cpp b/src/prefetch.cpp index 2922558c..5facb82c 100644 --- a/src/prefetch.cpp +++ b/src/prefetch.cpp @@ -273,7 +273,7 @@ class PrefetcherImpl : public Prefetcher { // Reload content uint32_t checksum = 0; TraceFormat fmt = {}; - for (int i = 0; i < hdr.data_size / sizeof(TraceFormat); ++i) { + for (size_t i = 0; i < hdr.data_size / sizeof(TraceFormat); ++i) { n_read = m_trace_file->read(&fmt, sizeof(TraceFormat)); if (n_read != sizeof(TraceFormat)) { LOG_ERRNO_RETURN(0, -1, "Prefetch: reload content failed"); diff --git a/src/test/simple_credsrv_test.cpp b/src/test/simple_credsrv_test.cpp index 2ebd47cb..acc33934 100644 --- a/src/test/simple_credsrv_test.cpp +++ b/src/test/simple_credsrv_test.cpp @@ -64,7 +64,7 @@ class SimpleAuthHandler : public HTTPHandler { resp.keep_alive(true); photon::thread_sleep(1); auto ret_w = resp.write((void*)msg.c_str(), msg.size()); - if (ret_w != msg.size()) { + if (ret_w != (ssize_t)msg.size()) { LOG_ERRNO_RETURN(0, -1, "send body failed, target: `, `", req.target(), VALUE(ret_w)); } else { From 2a65d8d804d1475167107aed698b61141e89c2c7 Mon Sep 17 00:00:00 2001 From: Lanzheng Liu Date: Tue, 21 Nov 2023 19:08:07 +0800 Subject: [PATCH 27/31] new cache implementation Co-authored-by: zhuangbowei.zbw Signed-off-by: Lanzheng Liu --- src/image_file.cpp | 65 +- src/image_file.h | 2 +- src/image_service.cpp | 14 +- src/overlaybd/cache/CMakeLists.txt | 6 +- src/overlaybd/cache/cache.cpp | 126 ++-- src/overlaybd/cache/cache.h | 83 +-- src/overlaybd/cache/cached_fs.cpp | 446 ++++++++++++++ src/overlaybd/cache/forwardcfs.h | 64 ++ src/overlaybd/cache/frontend/CMakeLists.txt | 6 - src/overlaybd/cache/frontend/cached_file.cpp | 362 ------------ src/overlaybd/cache/frontend/cached_file.h | 127 ---- src/overlaybd/cache/frontend/cached_fs.cpp | 146 ----- .../cache/full_file_cache/CMakeLists.txt | 3 - .../cache/full_file_cache/cache_pool.cpp | 84 +-- .../cache/full_file_cache/cache_pool.h | 18 +- .../cache/full_file_cache/cache_store.cpp | 33 +- .../cache/full_file_cache/cache_store.h | 23 +- .../cache/full_file_cache/test/cache_test.cpp | 388 ------------ src/overlaybd/cache/gzip_cache/cached_fs.cpp | 7 +- .../cache/ocf_cache/test/ocf_perf_test.cpp | 2 +- src/overlaybd/cache/policy/lru.h | 8 +- src/overlaybd/cache/pool_store.h | 289 ++++++--- src/overlaybd/cache/store.cpp | 427 ++++++++++++++ .../{full_file_cache => }/test/CMakeLists.txt | 3 +- src/overlaybd/cache/test/cache_test.cpp | 558 ++++++++++++++++++ .../test/random_generator.h | 57 +- src/overlaybd/gzindex/test/test.cpp | 27 +- src/overlaybd/registryfs/registryfs.cpp | 5 +- src/overlaybd/registryfs/registryfs_v2.cpp | 6 +- src/overlaybd/tar/tar_file.cpp | 2 +- src/overlaybd/zfile/test/test.cpp | 1 - 31 files changed, 2003 insertions(+), 1385 deletions(-) create mode 100644 src/overlaybd/cache/cached_fs.cpp create mode 100644 src/overlaybd/cache/forwardcfs.h delete mode 100644 src/overlaybd/cache/frontend/CMakeLists.txt delete mode 100644 src/overlaybd/cache/frontend/cached_file.cpp delete mode 100644 src/overlaybd/cache/frontend/cached_file.h delete mode 100644 src/overlaybd/cache/frontend/cached_fs.cpp delete mode 100644 src/overlaybd/cache/full_file_cache/test/cache_test.cpp create mode 100644 src/overlaybd/cache/store.cpp rename src/overlaybd/cache/{full_file_cache => }/test/CMakeLists.txt (99%) create mode 100644 src/overlaybd/cache/test/cache_test.cpp rename src/overlaybd/cache/{full_file_cache => }/test/random_generator.h (52%) diff --git a/src/image_file.cpp b/src/image_file.cpp index 7fa34e29..e22da6d2 100644 --- a/src/image_file.cpp +++ b/src/image_file.cpp @@ -128,57 +128,59 @@ IFile *ImageFile::__open_ro_target_remote(const std::string &dir, const std::str return remote_file; } +void get_error_msg(int eno, std::string &err_msg) { + if (eno == EPERM || eno == EACCES) { + err_msg = "Authentication failed"; + } else if (eno == ENOTCONN) { + err_msg = "Connection failed"; + } else if (eno == ETIMEDOUT) { + err_msg = "Get meta timedout"; + } else if (eno == ENOENT) { + err_msg = "No such file or directory"; + } else if (eno == EBUSY) { + err_msg = "Too many requests"; + } else if (eno == EIO) { + err_msg = "Unexpected response"; + } else { + err_msg = std::string(strerror(eno)); + } +} + IFile *ImageFile::__open_ro_remote(const std::string &dir, const std::string &digest, const uint64_t size, int layer_index) { - std::string url; - if (conf.repoBlobUrl() == "") { set_failed("empty repoBlobUrl"); LOG_ERROR_RETURN(0, nullptr, "empty repoBlobUrl for remote layer"); } - url = conf.repoBlobUrl(); - - if (url[url.length() - 1] != '/') - url += "/"; - url += digest; + estring url = estring().appends("/", conf.repoBlobUrl(), + (conf.repoBlobUrl().back() != '/') ? "/" : "", + digest); LOG_INFO("open file from remotefs: `, size: `", url, size); IFile *remote_file = image_service.global_fs.remote_fs->open(url.c_str(), O_RDONLY); if (!remote_file) { - std::string err_msg = "failed to open remote file " + url + ": "; - if (errno == EPERM || errno == EACCES) { - err_msg += "Authentication failed"; - } else if (errno == ENOTCONN) { - err_msg += "Connection failed"; - } else if (errno == ETIMEDOUT) { - err_msg += "Get meta timedout"; - } else if (errno == ENOENT) { - err_msg += "No such file or directory"; - } else if (errno == EBUSY) { - err_msg += "Too many requests"; - } else if (errno == EIO) { - err_msg += "Unexpected response"; - } else { - err_msg += std::string(strerror(errno)); - } - set_failed(err_msg); - LOG_ERRNO_RETURN(0, nullptr, err_msg); + std::string err_msg; + get_error_msg(errno, err_msg); + set_failed("failed to open remote file ", url, ": ", err_msg); + LOG_ERRNO_RETURN(0, nullptr, "failed to open remote file `: `", url, err_msg); } remote_file->ioctl(SET_SIZE, size); remote_file->ioctl(SET_LOCAL_DIR, dir); IFile *tar_file = new_tar_file_adaptor(remote_file); if (!tar_file) { - set_failed("failed to open remote file as tar file " + url); + std::string err_msg; + get_error_msg(errno, err_msg); + set_failed("failed to open remote file as tar file ", url, ": ", err_msg); delete remote_file; - LOG_ERROR_RETURN(0, nullptr, "failed to open remote file as tar file `", url); + LOG_ERRNO_RETURN(0, nullptr, "failed to open remote file as tar file `: `", url, err_msg); } ISwitchFile *switch_file = new_switch_file(tar_file, false, url.c_str()); if (!switch_file) { - set_failed("failed to open switch file " + url); + set_failed("failed to open switch file ", url); delete tar_file; - LOG_ERROR_RETURN(0, nullptr, "failed to open switch file `", url); + LOG_ERRNO_RETURN(0, nullptr, "failed to open switch file `", url); } if (conf.HasMember("download") && conf.download().enable() == 1) { @@ -517,10 +519,11 @@ void ImageFile::set_auth_failed() { } } -void ImageFile::set_failed(std::string reason) { +template +void ImageFile::set_failed(const Ts&...xs) { if (m_status == 0) // only set exit in image boot phase { m_status = -1; - m_exception = reason; + m_exception = estring().appends(xs...); } } diff --git a/src/image_file.h b/src/image_file.h index df747fba..344487e5 100644 --- a/src/image_file.h +++ b/src/image_file.h @@ -116,7 +116,7 @@ class ImageFile : public photon::fs::ForwardFile { ImageService &image_service; int init_image_file(); - void set_failed(std::string reason); + template void set_failed(const Ts&...xs); LSMT::IFileRO *open_lowers(std::vector &, bool &); LSMT::IFileRW *open_upper(ImageConfigNS::UpperConfig &); IFile *__open_ro_file(const std::string &); diff --git a/src/image_service.cpp b/src/image_service.cpp index d1c6f4a8..09544ca9 100644 --- a/src/image_service.cpp +++ b/src/image_service.cpp @@ -274,8 +274,16 @@ void ImageService::set_result_file(std::string &filename, std::string &data) { data.c_str()); } -static std::string cache_fn_trans_sha256(std::string_view path) { - return std::string(photon::fs::Path(path).basename()); +size_t cache_fn_trans_sha256(void *, std::string_view origin, char *name, size_t namesize) { + auto target = photon::fs::Path(origin).basename(); + if (target.size()+2 > namesize) { + // return 0, no name trans, use origin name for cache + LOG_ERROR_RETURN(ERANGE, 0, "name out of range"); + } + name[0] = '/'; + strncpy(name + 1, target.data(), target.size()); + name[target.size()+1] = 0; + return target.size()+1; } bool check_accelerate_url(std::string_view a_url) { @@ -369,7 +377,7 @@ int ImageService::init() { // file cache will delete its src_fs automatically when destructed global_fs.cached_fs = FileSystem::new_full_file_cached_fs( global_fs.srcfs, registry_cache_fs, refill_size, cache_size_GB, 10000000, - (uint64_t)1048576 * 4096, global_fs.io_alloc, cache_fn_trans_sha256); + (uint64_t)1048576 * 1024, global_fs.io_alloc, 0, {nullptr, &cache_fn_trans_sha256}); } else if (cache_type == "ocf") { auto namespace_dir = std::string(cache_dir + "/namespace"); diff --git a/src/overlaybd/cache/CMakeLists.txt b/src/overlaybd/cache/CMakeLists.txt index a193908d..760567be 100644 --- a/src/overlaybd/cache/CMakeLists.txt +++ b/src/overlaybd/cache/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(frontend) add_subdirectory(full_file_cache) add_subdirectory(ocf_cache) add_subdirectory(download_cache) @@ -9,7 +8,6 @@ file(GLOB SRC_CACHE "*.cpp") add_library(cache_lib STATIC ${SRC_CACHE}) target_link_libraries(cache_lib photon_static - cache_frontend_lib full_file_cache_lib ocf_cache_lib download_cache_lib @@ -18,3 +16,7 @@ target_link_libraries(cache_lib target_include_directories(cache_lib PUBLIC ${PHOTON_INCLUDE_DIR} ) + +if(BUILD_TESTING) + add_subdirectory(test) +endif() diff --git a/src/overlaybd/cache/cache.cpp b/src/overlaybd/cache/cache.cpp index b2b59cb0..46c2ba09 100644 --- a/src/overlaybd/cache/cache.cpp +++ b/src/overlaybd/cache/cache.cpp @@ -15,95 +15,99 @@ */ #include "cache.h" #include +#include #include #include +#include +#include -#include "frontend/cached_file.h" -#include "pool_store.h" #include "full_file_cache/cache_pool.h" namespace FileSystem { using namespace photon::fs; - -ICacheStore::try_preadv_result ICacheStore::try_preadv(const struct iovec *iov, int iovcnt, - off_t offset) { - try_preadv_result rst; - iovector_view view((iovec *)iov, iovcnt); - rst.iov_sum = view.sum(); - auto q = queryRefillRange(offset, rst.iov_sum); - if (q.second == 0) { // no need to refill - rst.refill_size = 0; - rst.size = this->preadv(iov, iovcnt, offset); - } else { - rst.refill_size = q.second; - rst.refill_offset = q.first; - } - return rst; -} -ICacheStore::try_preadv_result ICacheStore::try_preadv_mutable(struct iovec *iov, int iovcnt, - off_t offset) { - return try_preadv(iov, iovcnt, offset); -} -ssize_t ICacheStore::preadv(const struct iovec *iov, int iovcnt, off_t offset) { - SmartCloneIOV<32> ciov(iov, iovcnt); - return preadv_mutable(ciov.iov, iovcnt, offset); -} -ssize_t ICacheStore::preadv_mutable(struct iovec *iov, int iovcnt, off_t offset) { - return preadv(iov, iovcnt, offset); -} -ssize_t ICacheStore::pwritev(const struct iovec *iov, int iovcnt, off_t offset) { - SmartCloneIOV<32> ciov(iov, iovcnt); - return pwritev_mutable(ciov.iov, iovcnt, offset); -} -ssize_t ICacheStore::pwritev_mutable(struct iovec *iov, int iovcnt, off_t offset) { - return pwritev(iov, iovcnt, offset); -} - ICachedFileSystem *new_full_file_cached_fs(IFileSystem *srcFs, IFileSystem *mediaFs, uint64_t refillUnit, uint64_t capacityInGB, uint64_t periodInUs, uint64_t diskAvailInBytes, - IOAlloc *allocator, Fn_trans_func name_trans) { - if (refillUnit % 4096 != 0) { - LOG_ERROR_RETURN(EINVAL, nullptr, "refill Unit need to be aligned to 4KB") + IOAlloc *allocator, int quotaDirLevel, + CacheFnTransFunc fn_trans_func) { + if (refillUnit % 4096 != 0 || !is_power_of_2(refillUnit)) { + LOG_ERROR_RETURN(EINVAL, nullptr, "refill Unit need to be aligned to 4KB and power of 2") } if (!allocator) { allocator = new IOAlloc; } Cache::FileCachePool *pool = nullptr; pool = - new ::Cache::FileCachePool(mediaFs, capacityInGB, periodInUs, diskAvailInBytes, refillUnit, name_trans); + new ::Cache::FileCachePool(mediaFs, capacityInGB, periodInUs, diskAvailInBytes, refillUnit); pool->Init(); - return new_cached_fs(srcFs, pool, 4096, refillUnit, allocator); + return new_cached_fs(srcFs, pool, 4096, allocator, fn_trans_func); +} + +using OC = ObjectCache; +ICachePool::ICachePool(uint32_t pool_size, uint32_t max_refilling, uint32_t refilling_threshold) + : m_stores(new OC(10UL * 1000 * 1000)), m_max_refilling(max_refilling), + m_refilling_threshold(refilling_threshold) { + if (pool_size != 0) { + m_thread_pool = photon::new_thread_pool(pool_size, 128 * 1024UL); + m_vcpu = photon::get_vcpu(); + }; +} + +#define cast(x) static_cast(x) +ICachePool::~ICachePool() { + stores_clear(); + delete cast(m_stores); +} + +void ICachePool::stores_clear() { + if (m_thread_pool) { + auto pool = static_cast(m_thread_pool); + m_thread_pool = nullptr; + photon::delete_thread_pool(pool); + } + cast(m_stores)->clear(); } ICacheStore *ICachePool::open(std::string_view filename, int flags, mode_t mode) { - ICacheStore *cache_store = nullptr; - auto it = m_stores.find(filename); - if (it != m_stores.end()) - cache_store = it->second; - if (cache_store == nullptr) { - cache_store = this->do_open(filename, flags, mode); + char store_name[4096]; + std::string x(filename); + auto len = this->fn_trans_func(filename, store_name, sizeof(store_name)); + std::string_view store_sv = len ? std::string_view(store_name, len) : filename; + auto ctor = [&]() -> ICacheStore * { + auto cache_store = this->do_open(store_sv, flags, mode); if (nullptr == cache_store) { LOG_ERRNO_RETURN(0, nullptr, "fileCachePool_ open file failed, name : `", filename.data()); } - m_stores.emplace(filename, cache_store); - auto it = m_stores.find(filename); - std::string_view map_key = it->first; - cache_store->set_pathname(map_key); + auto it = cast(m_stores)->find(store_sv); + std::string_view map_key = (*it)->key(); + cache_store->set_store_key(map_key); + cache_store->set_src_name(filename); cache_store->set_pool(this); + struct stat st; + SET_STRUCT_STAT(&st); + st.st_size = -1; + if (cache_store->fstat(&st) == 0) { + cache_store->set_cached_size(st.st_size); + cache_store->set_actual_size(st.st_size); + } + cache_store->set_open_flags(flags); + return cache_store; + }; + auto store = cast(m_stores)->acquire(store_sv, ctor); + if (store) { + auto cnt = store->ref_.fetch_add(1, std::memory_order_relaxed); + if (cnt) + cast(m_stores)->release(store_sv); } - cache_store->add_ref(); - return cache_store; + return store; +} + +void ICachePool::set_trans_func(CacheFnTransFunc fn_trans_func) { + this->fn_trans_func = fn_trans_func; } int ICachePool::store_release(ICacheStore *store) { - auto iter = m_stores.find(store->get_pathname()); - if (iter == m_stores.end()) { - LOG_ERROR_RETURN(0, -1, "try to erase an unexist store from map m_stores , name : `", - store->get_pathname().data()); - } - m_stores.erase(iter); - return 0; + return cast(m_stores)->release(store->get_store_key()); } } // namespace FileSystem diff --git a/src/overlaybd/cache/cache.h b/src/overlaybd/cache/cache.h index 243f8569..06b8b38b 100644 --- a/src/overlaybd/cache/cache.h +++ b/src/overlaybd/cache/cache.h @@ -14,32 +14,59 @@ limitations under the License. */ #pragma once -#include +#include #include #include +#include #include #include "pool_store.h" +#define O_WRITE_THROUGH 0x01000000 // write backing store and cache +#define O_WRITE_AROUND 0x02000000 // write backing store only, default +#define O_WRITE_BACK 0x04000000 // write cache and async flush to backing store, not support yet +#define O_CACHE_ONLY 0x08000000 // write cache only +#define O_DIRECT_LOCAL 0x20000000 // read local +#define O_MMAP_READ 0x00800000 // mmap like read + +#define RW_V2_HIGH_PRIORITY 0x00000001 // preadv2/pwritev2 high priority cache data +#define RW_V2_PROMOTE 0x00000002 // preadv2 promote flag +#define RW_V2_CACHE_ONLY 0x00000004 // preadv2 cache only flag +#define RW_V2_TO_BUFFER_WITHOUT_SYNC \ + 0x00000010 // pwritev2 to buffered accessor file's buffer without sync +#define RW_V2_MEMORY_ONLY 0x00000020 // pwritev2 memory cache only + +#define IS_STRUCT_STAT_SETTED(x) ((*(uint64_t *)x) == 0xF19A336DB7CA28E7ull) +#define SET_STRUCT_STAT(x) ((*(uint64_t *)x) = 0xF19A336DB7CA28E7ull) + +const int IOCTL_GET_PAGE_SIZE = 161; + +namespace Cache { +namespace Block { +struct Options; +} +} // namespace Cache struct IOAlloc; namespace FileSystem { class ICachedFileSystem : public photon::fs::IFileSystem { public: // get the source file system - UNIMPLEMENTED_POINTER(photon::fs::IFileSystem *get_source()); + UNIMPLEMENTED_POINTER(IFileSystem *get_source()); // set the source file system - UNIMPLEMENTED(int set_source(photon::fs::IFileSystem *src)); + UNIMPLEMENTED(int set_source(IFileSystem *src)); UNIMPLEMENTED_POINTER(ICachePool *get_pool()); + + UNIMPLEMENTED(int set_pool(ICachePool *pool)); }; class ICachedFile : public photon::fs::IFile { public: // get the source file system - UNIMPLEMENTED_POINTER(photon::fs::IFile *get_source()); + UNIMPLEMENTED_POINTER(IFile *get_source()); // set the source file system, and enable `auto_refill` - UNIMPLEMENTED(int set_source(photon::fs::IFile *src)); + UNIMPLEMENTED(int set_source(IFile *src)); UNIMPLEMENTED_POINTER(ICacheStore *get_store()); @@ -55,15 +82,9 @@ class ICachedFile : public photon::fs::IFile { return pwritev(iov, iovcnt, offset); } - // refilling a range without providing data, is treated as prefeching + // refilling a range without providing data, is treated as prefetching ssize_t refill(off_t offset, size_t count) { - return prefetch(offset, count); - } - - // prefeching a range is implemented as reading the range without a buffer - ssize_t prefetch(off_t offset, size_t count) { - iovec iov{nullptr, count}; - return preadv(&iov, 1, offset); + return fadvise(offset, count, POSIX_FADV_WILLNEED); } // query cached extents is implemented as fiemap() @@ -82,38 +103,22 @@ class ICachedFile : public photon::fs::IFile { } }; -class IMemCachedFile : public ICachedFile { -public: - // Get the internal buffer for the specified LBA range (usually aligned), - // which will remain valid for user until released by unpin_buffer(). - // Will allocate pages for missed ranges. - // Will refill / fetch / load data from source if `refill`. - // Concurrent R/W to a same range are guaranteed to work, but considered - // a race-condition and the result is undefiend. - // returns # of bytes actually got, or <0 for failures - virtual ssize_t pin_buffer(off_t offset, size_t count, bool refill, /*OUT*/ iovector *iov) = 0; - - // Release buffers got from pin_buffer(), - // and the buffer is no longer valid for user. - // return 0 for success, < 0 for failures - virtual int unpin_buffer(off_t offset, const iovector *iov) = 0; -}; - extern "C" { ICachedFileSystem *new_cached_fs(photon::fs::IFileSystem *src, ICachePool *pool, uint64_t pageSize, - uint64_t refillUnit, IOAlloc *allocator); + IOAlloc *allocator, CacheFnTransFunc fn_trans_func = nullptr); + +ICachedFile *new_cached_file(ICacheStore *store, uint64_t pageSize, photon::fs::IFileSystem *fs); -/** Full file cache will automatically delete its media_fs when destructed */ ICachedFileSystem *new_full_file_cached_fs(photon::fs::IFileSystem *srcFs, - photon::fs::IFileSystem *media_fs, - uint64_t refillUnit, uint64_t capacityInGB, - uint64_t periodInUs, uint64_t diskAvailInBytes, - IOAlloc *allocator, - Fn_trans_func name_trans = ICachePool::same_name_trans); + photon::fs::IFileSystem *media_fs, uint64_t refillUnit, + uint64_t capacityInGB, uint64_t periodInUs, + uint64_t diskAvailInBytes, IOAlloc *allocator, + int quotaDirLevel, + CacheFnTransFunc fn_trans_func = nullptr); /** - * @param blk_size The proper size for cache metadata and IO efficiency. - * Large writes to cache media will be split into blk_size. Reads are not affected. + * @param blk_size The proper size for cache metadata and IO efficiency. Large writes to cache media + * will be split into blk_size. Reads and small writes are not affected. * @param prefetch_unit Controls the expand prefetch size from src file. 0 means to disable this * feature. */ diff --git a/src/overlaybd/cache/cached_fs.cpp b/src/overlaybd/cache/cached_fs.cpp new file mode 100644 index 00000000..6d775f9b --- /dev/null +++ b/src/overlaybd/cache/cached_fs.cpp @@ -0,0 +1,446 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "cache.h" +#include "photon/common/alog.h" +#include "photon/common/io-alloc.h" +#include "photon/common/iovector.h" +#include "photon/common/string_view.h" +#include "photon/fs/filesystem.h" +#include "photon/fs/range-split.h" +#include "pool_store.h" + +namespace Cache { + +using namespace FileSystem; +using namespace photon::fs; + +const uint64_t kMaxPrefetchSize = 32 * 1024 * 1024; + +class CachedFs : public ICachedFileSystem, public IFileSystemXAttr { +public: + CachedFs(IFileSystem *srcFs, ICachePool *fileCachePool, size_t pageSize, IOAlloc *allocator, + CacheFnTransFunc fn_trans_func) + : srcFs_(srcFs), fileCachePool_(fileCachePool), pageSize_(pageSize), allocator_(allocator), + xattrFs_(dynamic_cast(srcFs)) { + fileCachePool_->set_trans_func(fn_trans_func); + } + + ~CachedFs() { + delete fileCachePool_; + } + + IFile *open(const char *pathname, int flags, mode_t mode) override { + int cflags = (flags & (O_WRITE_THROUGH | O_CACHE_ONLY | O_WRITE_BACK)); + auto cache_store = fileCachePool_->open(pathname, O_RDWR | O_CREAT | cflags, 0644); + if (nullptr == cache_store) { + LOG_ERRNO_RETURN(0, nullptr, "fileCachePool_ open file failed, name : `", pathname) + } + + cache_store->set_src_fs(srcFs_); + cache_store->set_page_size(pageSize_); + cache_store->set_allocator(allocator_); + auto ret = new_cached_file(cache_store, pageSize_, this); + if (ret == nullptr) { // if create file is failed + // cache_store must be release, or will leak + cache_store->release(); + } + return ret; + } + + IFile *open(const char *pathname, int flags) override { + return open(pathname, flags, 0); // mode and flags are meaningless in RoCacheFS::open(2)(3) + } + + int mkdir(const char *pathname, mode_t mode) override { + return srcFs_ ? srcFs_->mkdir(pathname, mode) : -1; + } + + int rmdir(const char *pathname) override { + return srcFs_ ? srcFs_->rmdir(pathname) : -1; + } + + ssize_t readlink(const char *path, char *buf, size_t bufsiz) override { + return srcFs_ ? srcFs_->readlink(path, buf, bufsiz) : -1; + } + + int rename(const char *oldname, const char *newname) override { + return fileCachePool_->rename(oldname, newname); + } + + int unlink(const char *filename) override { + auto cache_store = fileCachePool_->open(filename, O_RDONLY, 0); + if (cache_store != nullptr) { + cache_store->set_cached_size(0); + cache_store->set_actual_size(0); + cache_store->release(); + } + auto ret = fileCachePool_->evict(filename); + return srcFs_ ? srcFs_->unlink(filename) : ret; + } + + int statfs(const char *path, struct statfs *buf) override { + return srcFs_ ? srcFs_->statfs(path, buf) : -1; + } + int statvfs(const char *path, struct statvfs *buf) override { + return srcFs_ ? srcFs_->statvfs(path, buf) : -1; + } + int stat(const char *path, struct stat *buf) override { + return srcFs_ ? srcFs_->stat(path, buf) : -1; + } + int lstat(const char *path, struct stat *buf) override { + return srcFs_ ? srcFs_->lstat(path, buf) : -1; + } + + int access(const char *pathname, int mode) override { + if (srcFs_) + return srcFs_->access(pathname, mode); + auto cache_store = fileCachePool_->open(pathname, O_RDONLY, 0); + if (cache_store == nullptr) + return -1; + cache_store->release(); + return 0; + } + + DIR *opendir(const char *name) override { + return srcFs_ ? srcFs_->opendir(name) : nullptr; + } + + IFileSystem *get_source() override { + return srcFs_; + } + + int set_source(IFileSystem *src) override { + srcFs_ = src; + return 0; + } + + ICachePool *get_pool() override { + return fileCachePool_; + } + + int set_pool(ICachePool *pool) override { + fileCachePool_ = pool; + return 0; + } + + ssize_t getxattr(const char *path, const char *name, void *value, size_t size) override { + return xattrFs_ ? xattrFs_->getxattr(path, name, value, size) : -1; + } + + virtual ssize_t lgetxattr(const char *path, const char *name, void *value, + size_t size) override { + return xattrFs_ ? xattrFs_->lgetxattr(path, name, value, size) : -1; + } + + ssize_t listxattr(const char *path, char *list, size_t size) override { + return xattrFs_ ? xattrFs_->listxattr(path, list, size) : -1; + } + + ssize_t llistxattr(const char *path, char *list, size_t size) override { + return xattrFs_ ? xattrFs_->llistxattr(path, list, size) : -1; + } + + int setxattr(const char *path, const char *name, const void *value, size_t size, + int flags) override { + return xattrFs_ ? xattrFs_->setxattr(path, name, value, size, flags) : -1; + } + + int lsetxattr(const char *path, const char *name, const void *value, size_t size, + int flags) override { + return xattrFs_ ? xattrFs_->lsetxattr(path, name, value, size, flags) : -1; + } + + int removexattr(const char *path, const char *name) override { + return xattrFs_ ? xattrFs_->removexattr(path, name) : -1; + } + + int lremovexattr(const char *path, const char *name) override { + return xattrFs_ ? xattrFs_->lremovexattr(path, name) : -1; + } + + UNIMPLEMENTED_POINTER(IFile *creat(const char *pathname, mode_t mode)); + UNIMPLEMENTED(int symlink(const char *oldname, const char *newname)); + UNIMPLEMENTED(int link(const char *oldname, const char *newname)); + UNIMPLEMENTED(int chmod(const char *pathname, mode_t mode)); + UNIMPLEMENTED(int chown(const char *pathname, uid_t owner, gid_t group)); + UNIMPLEMENTED(int lchown(const char *pathname, uid_t owner, gid_t group)); + UNIMPLEMENTED(int truncate(const char *path, off_t length)); + UNIMPLEMENTED(int utime(const char *path, const struct utimbuf *file_times)); + UNIMPLEMENTED(int utimes(const char *path, const struct timeval times[2])); + UNIMPLEMENTED(int lutimes(const char *path, const struct timeval times[2])); + UNIMPLEMENTED(int mknod(const char *path, mode_t mode, dev_t dev)); + UNIMPLEMENTED(int syncfs()); + +private: + IFileSystem *srcFs_; // owned by extern + ICachePool *fileCachePool_; // owned by current class + size_t pageSize_; + + IOAlloc *allocator_; + IFileSystemXAttr *xattrFs_; +}; + +/* + * the procedures of pread are as follows: + * 1. check that the cache is hit(contain unaligned block). + * 2. if hit, just read from cache. + * 3. if not, merge all holes into one read request(offset, size), + * then read missing data from source of file and write it into cache, + * after that read cache' data into user's buffer. + */ +class CachedFile : public ICachedFile, public IFileXAttr { +public: + CachedFile(ICacheStore *cache_store, size_t pageSize, IFileSystem *fs) + : cache_store_(cache_store), pageSize_(pageSize), fs_(fs) { + } + + ~CachedFile() { + cache_store_->release(); + } + + IFileSystem *filesystem() override { + return fs_; + } + + ssize_t pread(void *buf, size_t count, off_t offset) override { + struct iovec v { + buf, count + }; + return preadv(&v, 1, offset); + } + + ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) override { + return preadv2(iov, iovcnt, offset, 0); + } + + ssize_t preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags) override { + return cache_store_->preadv2(iov, iovcnt, offset, flags); + } + + // pwrite* need to be aligned to 4KB for avoiding write padding. + ssize_t pwrite(const void *buf, size_t count, off_t offset) override { + struct iovec v { + const_cast(buf), count + }; + return pwritev(&v, 1, offset); + } + + ssize_t pwritev(const struct iovec *iov, int iovcnt, off_t offset) override { + return pwritev2(iov, iovcnt, offset, 0); + } + + ssize_t pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags) override { + return cache_store_->pwritev2(iov, iovcnt, offset, flags); + } + + int fstat(struct stat *buf) override { + DEFER({ buf->st_ino = cache_store_->get_handle(); }); + auto size = cache_store_->get_actual_size(); + if (size % pageSize_ != 0) { + buf->st_size = size; + return 0; + } + IFile *src_file = nullptr; + if (cache_store_->open_src_file(&src_file) != 0) + return -1; + if (src_file) + return src_file->fstat(buf); + return cache_store_->fstat(buf); + } + + int close() override { + return 0; + } + + ssize_t read(void *buf, size_t count) override { + struct iovec v { + buf, count + }; + return readv(&v, 1); + } + + ssize_t readv(const struct iovec *iov, int iovcnt) override { + auto ret = preadv(iov, iovcnt, readOffset_); + if (ret > 0) { + readOffset_ += ret; + } + return ret; + } + + ssize_t write(const void *buf, size_t count) override { + struct iovec v { + const_cast(buf), count + }; + return writev(&v, 1); + } + + ssize_t writev(const struct iovec *iov, int iovcnt) override { + auto ret = pwritev(iov, iovcnt, writeOffset_); + if (ret > 0) { + writeOffset_ += ret; + } + return ret; + } + + int query(off_t offset, size_t count) override { + auto ret = cache_store_->queryRefillRange(offset, count); + if (ret.first < 0) + return -1; + return ret.second; + } + + // offset and len must be aligned 4k, otherwise it's useless. + // !!! need ensure no other read operation, otherwise read may read hole data(zero). + int fallocate(int mode, off_t offset, off_t len) override { + if (len == -1) { + return cache_store_->evict(offset, len); + } + range_split rs(offset, len, pageSize_); + auto aligned_offset = rs.aligned_begin_offset(); + auto aligned_len = rs.aligned_length(); + LOG_DEBUG(VALUE(offset), VALUE(len), VALUE(aligned_offset), VALUE(aligned_len)); + return cache_store_->evict(aligned_offset, aligned_len); + } + + int fadvise(off_t offset, off_t len, int advice) override { + if (advice == POSIX_FADV_WILLNEED) { + int ret = prefetch(len, offset, 0); + if (ret < 0) { + LOG_ERROR_RETURN(0, -1, "prefetch read failed"); + } + return 0; + } + LOG_ERRNO_RETURN(ENOSYS, -1, "advice ` is not implemented", advice); + } + + IFile *get_source() override { + IFile *src = nullptr; + if (cache_store_->open_src_file(&src) != 0) + return nullptr; + return src; + } + + inline void get_source_filexattr() { + if (!source_filexattr_) { + auto sfile = get_source(); + source_filexattr_ = dynamic_cast(sfile); + } + } + + // set the source file system, and enable `auto_refill` + int set_source(IFile *src) override { + cache_store_->set_src_file(src); + return 0; + } + + ICacheStore *get_store() override { + return cache_store_; + } + + int ftruncate(off_t length) override { + cache_store_->set_cached_size(length); + cache_store_->set_actual_size(length); + return 0; + } + + std::string_view get_pathname() { + return get_store()->get_src_name(); + } + + ssize_t fgetxattr(const char *name, void *value, size_t size) override { + get_source_filexattr(); + return source_filexattr_ ? source_filexattr_->fgetxattr(name, value, size) : -1; + } + + ssize_t flistxattr(char *list, size_t size) override { + get_source_filexattr(); + return source_filexattr_ ? source_filexattr_->flistxattr(list, size) : -1; + } + + int fsetxattr(const char *name, const void *value, size_t size, int flags) override { + get_source_filexattr(); + return source_filexattr_ ? source_filexattr_->fsetxattr(name, value, size, flags) : -1; + } + + int fremovexattr(const char *name) override { + get_source_filexattr(); + return source_filexattr_ ? source_filexattr_->fremovexattr(name) : -1; + } + + UNIMPLEMENTED(off_t lseek(off_t offset, int whence)); + UNIMPLEMENTED(int fsync()); + UNIMPLEMENTED(int fdatasync()); + UNIMPLEMENTED(int fchmod(mode_t mode)); + UNIMPLEMENTED(int fchown(uid_t owner, gid_t group)); + UNIMPLEMENTED(int fiemap(photon::fs::fiemap *map)); + +protected: + ssize_t prefetch(size_t count, off_t offset, int flags) { + uint64_t end = photon::sat_add(offset, count); + if (offset % pageSize_ != 0) { + offset = offset / pageSize_ * pageSize_; + } + if (end % pageSize_ != 0) { + end = photon::sat_add(end, pageSize_ - 1) / pageSize_ * pageSize_; + } + + uint64_t remain = end - offset; + ssize_t read = 0; + while (remain > 0) { + off_t min = std::min(kMaxPrefetchSize, remain); + remain -= min; + auto ret = cache_store_->try_refill_range(offset, static_cast(min)); + if (ret < 0) { + LOG_ERRNO_RETURN(0, -1, + "try_refill_range failed, ret : `, len : `, offset : `, flags : `", + ret, min, offset, flags); + } + read += ret; + // read end of file. + if (ret < min) { + return read; + } + offset += ret; + } + return read; + } + + ICacheStore *cache_store_; + size_t pageSize_; + IFileSystem *fs_; + + off_t readOffset_ = 0; + off_t writeOffset_ = 0; + IFileXAttr *source_filexattr_ = nullptr; +}; + +} // namespace Cache + +namespace FileSystem { +using namespace photon::fs; +ICachedFileSystem *new_cached_fs(IFileSystem *src, ICachePool *pool, uint64_t pageSize, + IOAlloc *allocator, CacheFnTransFunc fn_trans_func) { + if (!allocator) { + allocator = new IOAlloc; + } + return new ::Cache::CachedFs(src, pool, pageSize, allocator, fn_trans_func); +} + +ICachedFile *new_cached_file(ICacheStore *store, uint64_t pageSize, IFileSystem *fs) { + return new ::Cache::CachedFile(store, pageSize, fs); +} +} // namespace FileSystem diff --git a/src/overlaybd/cache/forwardcfs.h b/src/overlaybd/cache/forwardcfs.h new file mode 100644 index 00000000..e5cf520f --- /dev/null +++ b/src/overlaybd/cache/forwardcfs.h @@ -0,0 +1,64 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once +#include "cache.h" +#include "photon/fs/forwardfs.h" + +namespace FileSystem { +template +class ForwardCachedFileBase : public IForwardCachedFile { +protected: + using Base = IForwardCachedFile; + using Base::Base; + virtual photon::fs::IFile *get_source() override { + return Base::m_file->get_source(); + } + virtual int set_source(photon::fs::IFile *src) override { + return Base::m_file->set_source(src); + } + virtual ICacheStore *get_store() override { + return Base::m_file->get_store(); + } + virtual int query(off_t offset, size_t count) override { + return Base::m_file->query(offset, count); + } +}; +using ForwardCachedFile = ForwardCachedFileBase>; +using ForwardCachedFile_Ownership = + ForwardCachedFileBase>; + +template +class ForwardCachedFSBase : public IForwardCachedFS { +protected: + using Base = IForwardCachedFS; + using Base::Base; + virtual photon::fs::IFileSystem *get_source() override { + return Base::m_fs->get_source(); + } + virtual int set_source(photon::fs::IFileSystem *src) override { + return Base::m_fs->set_source(src); + } + virtual ICachePool *get_pool() override { + return Base::m_fs->get_pool(); + } + virtual int set_pool(ICachePool *pool) override { + return Base::m_fs->set_pool(pool); + } +}; +using ForwardCachedFS = ForwardCachedFSBase>; +using ForwardCachedFS_Ownership = + ForwardCachedFSBase>; +} // namespace FileSystem diff --git a/src/overlaybd/cache/frontend/CMakeLists.txt b/src/overlaybd/cache/frontend/CMakeLists.txt deleted file mode 100644 index 2fb8e1fb..00000000 --- a/src/overlaybd/cache/frontend/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -file(GLOB SRC_FRONTEND "*.cpp") - -add_library(cache_frontend_lib STATIC ${SRC_FRONTEND}) -target_include_directories(cache_frontend_lib PUBLIC - ${PHOTON_INCLUDE_DIR} -) \ No newline at end of file diff --git a/src/overlaybd/cache/frontend/cached_file.cpp b/src/overlaybd/cache/frontend/cached_file.cpp deleted file mode 100644 index 4a67f8bb..00000000 --- a/src/overlaybd/cache/frontend/cached_file.cpp +++ /dev/null @@ -1,362 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -#include "cached_file.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "../pool_store.h" - -namespace Cache { - -using namespace photon::fs; - -const off_t kMaxPrefetchSize = 16 * 1024 * 1024; - -CachedFile::CachedFile(IFile *src_file, FileSystem::ICacheStore *cache_store, off_t size, - size_t pageSize, size_t refillUnit, IOAlloc *allocator, IFileSystem *fs) - : src_file_(src_file), cache_store_(cache_store), size_(size), pageSize_(pageSize), - refillUnit_(refillUnit), allocator_(allocator), fs_(fs), readOffset_(0), writeOffset_(0){}; - -CachedFile::~CachedFile() { - cache_store_->release(); - delete src_file_; -} - -IFileSystem *CachedFile::filesystem() { - return fs_; -} - -ssize_t CachedFile::pread(void *buf, size_t count, off_t offset) { - struct iovec v { - buf, count - }; - return preadv(&v, 1, offset); -} - -ssize_t CachedFile::preadv(const struct iovec *iov, int iovcnt, off_t offset) { - if (1 == iovcnt && !iov->iov_base) { - return prefetch(iov->iov_len, offset); - } - return preadvInternal(iov, iovcnt, offset); -} - -ssize_t CachedFile::prefetch(size_t count, off_t offset) { - static char buf[kMaxPrefetchSize * 2] = {0}; - void *alignBuf = align_ptr(buf, pageSize_); - struct iovec iov { - alignBuf, count - }; - - auto end = offset + count; - if (offset % pageSize_ != 0) { - offset = offset & ~(pageSize_ - 1); - } - if (end % pageSize_ != 0) { - end = (end + pageSize_ - 1) & ~(pageSize_ - 1); - } - - off_t remain = end - offset; - ssize_t read = 0; - while (remain > 0) { - off_t min = std::min(kMaxPrefetchSize, remain); - remain -= min; - iov.iov_len = min; - auto ret = preadvInternal(&iov, 1, offset); - if (ret < 0) { - LOG_ERRNO_RETURN(0, -1, "preadv failed, ret : `, len : `, offset : `, size_ : `", ret, - min, offset, size_); - } - read += ret; - // read end of file. - if (ret < min) { - return read; - } - offset += ret; - } - return read; -} - -ssize_t CachedFile::preadvInternal(const struct iovec *iov, int iovcnt, off_t offset) { - if (offset < 0) { - LOG_ERROR_RETURN(EINVAL, -1, "offset is invalid, offset : `", offset) - } - - iovector_view view(const_cast(iov), iovcnt); - size_t iovSize = view.sum(); - if (0u == iovSize) { - return 0; - } - - if (offset >= size_ || offset + static_cast(iovSize) > size_) { - struct stat st; - auto ok = fstat(&st); - if (ok == 0 && st.st_size > size_) { - off_t last = align_down(size_, pageSize_); - if (last != size_) - cache_store_->evict(last, pageSize_); - size_ = st.st_size; - } - } - - if (offset >= size_) { - return 0; - } - - IOVector input(iov, iovcnt); - if (offset + static_cast(iovSize) > size_) { - input.extract_back(offset + static_cast(iovSize) - size_); - iovSize = size_ - offset; - } - -again: - auto tr = cache_store_->try_preadv(input.iovec(), input.iovcnt(), offset); - if (tr.refill_offset < 0) { - if (src_file_) { - ssize_t ret; - SCOPE_AUDIT("download", AU_FILEOP(get_pathname(), offset, ret)); - ret = src_file_->preadv(input.iovec(), input.iovcnt(), offset); - return ret; - } - - return -1; - } else if (tr.refill_size == 0 && tr.size >= 0) { - return tr.size; - } - - if (!src_file_) { - return -1; - } - - uint64_t refillOff = tr.refill_offset; - uint64_t refillSize = tr.refill_size; - if (refillOff + refillSize > static_cast(size_)) { - refillSize = size_ - refillOff; - } - - int ret = rangeLock_.try_lock_wait(refillOff, refillSize); - if (ret < 0) { - goto again; - } - - IOVector buffer(*allocator_); - { - DEFER(rangeLock_.unlock(refillOff, refillSize)); - auto alloc = buffer.push_back(refillSize); - if (alloc < refillSize) { - LOG_ERROR("memory allocate failed, refillSize:`, alloc:`", refillSize, alloc); - ssize_t ret; - SCOPE_AUDIT("download", AU_FILEOP(get_pathname(), offset, ret)); - ret = src_file_->preadv(input.iovec(), input.iovcnt(), offset); - return ret; - } - - ssize_t read; - { - SCOPE_AUDIT("download", AU_FILEOP(get_pathname(), offset, read)); - read = src_file_->preadv(buffer.iovec(), buffer.iovcnt(), refillOff); - } - - if (read != static_cast(refillSize)) { - LOG_ERRNO_RETURN( - 0, -1, - "src file read failed, read : `, expectRead : `, size_ : `, offset : `, sum : `", - read, refillSize, size_, refillOff, buffer.sum()); - } - - auto write = cache_store_->pwritev(buffer.iovec(), buffer.iovcnt(), refillOff); - - if (write != static_cast(refillSize)) { - if (ENOSPC != errno) - LOG_ERROR("cache file write failed : `, error : `, size_ : `, offset : `, sum : `", - write, ERRNO(errno), size_, refillOff, buffer.sum()); - ssize_t ret; - { - SCOPE_AUDIT("download", AU_FILEOP(get_pathname(), offset, ret)); - ret = src_file_->preadv(input.iovec(), input.iovcnt(), offset); - } - return ret; - } - } - - IOVector refillBuf(buffer.iovec(), buffer.iovcnt()); - int remain = iovSize; - int result = 0; - if (tr.refill_offset <= offset) { - auto inView = input.view(); - refillBuf.extract_front(offset - tr.refill_offset); - auto copy = refillBuf.memcpy_to(&inView, iovSize); - remain -= copy; - offset += copy; - result += copy; - } else if (tr.refill_offset + tr.refill_size >= offset + iovSize) { - iovector_view tailIov; - tailIov.iovcnt = 0; - input.slice(iovSize - (tr.refill_offset - offset), tr.refill_offset - offset, &tailIov); - auto copy = refillBuf.memcpy_to(&tailIov); - input.extract_back(copy); - result += copy; - remain -= copy; - } - - if (remain > 0) { - auto readRet = cache_store_->preadv(input.iovec(), input.iovcnt(), offset); - if (readRet < 0) { - SCOPE_AUDIT("download", AU_FILEOP(get_pathname(), offset, readRet)); - readRet = src_file_->preadv(input.iovec(), input.iovcnt(), offset); - if (readRet < 0) - LOG_ERRNO_RETURN(0, readRet, "read failed, ret:`, offset:`,sum:`,size_:`", readRet, - offset, input.sum(), size_); - } - - return result + readRet; - } - - return result; -} - -ssize_t CachedFile::pwrite(const void *buf, size_t count, off_t offset) { - struct iovec v { - const_cast(buf), count - }; - return pwritev(&v, 1, offset); -} - -ssize_t CachedFile::pwritev(const struct iovec *iov, int iovcnt, off_t offset) { - if (offset >= size_) { - return 0; - } - - iovector_view view(const_cast(iov), iovcnt); - size_t size = view.sum(); - - if (offset % pageSize_ != 0 || - (size % pageSize_ != 0 && offset + static_cast(size) < size_)) { - LOG_ERROR_RETURN(EINVAL, -1, "size or offset is not aligned to 4K, size : `, offset : `", - size, offset); - } - - if (offset + static_cast(size) <= size_) { - return cache_store_->pwritev(iov, iovcnt, offset); - } - - IOVector ioVector(iov, iovcnt); - if (offset + static_cast(size) > size_) { - auto ret = ioVector.extract_back(size - (size_ - offset)); - if (ret != size - (size_ - offset)) - LOG_ERRNO_RETURN(EINVAL, -1, "extract failed, extractSize : `, expected : ", ret, - size - (size_ - offset)) - } - - auto write = cache_store_->pwritev(ioVector.iovec(), ioVector.iovcnt(), offset); - if (write != static_cast(ioVector.sum())) { - if (ENOSPC != errno) - LOG_ERROR("cache file write failed : `, error : `, size_ : `, offset : `, sum : `", - write, ERRNO(errno), size_, offset, ioVector.sum()); - } - - return write; -} - -int CachedFile::fiemap(struct fiemap *map) { - errno = ENOSYS; - return -1; -} - -int CachedFile::query(off_t offset, size_t count) { - auto ret = cache_store_->queryRefillRange(offset, count); - return ret.second; -} - -int CachedFile::fallocate(int mode, off_t offset, off_t len) { - if (len == -1) { - len = size_ - offset; - } - range_split_power2 rs(offset, len, pageSize_); - auto aligned_offset = rs.aligned_begin_offset(); - auto aligned_len = rs.aligned_length(); - LOG_DEBUG("fallocate offset: `, len: `, aligned offset: `, aligned len: `", offset, len, - aligned_offset, aligned_len); - return cache_store_->evict(aligned_offset, aligned_len); -} - -int CachedFile::fstat(struct stat *buf) { - return src_file_ ? src_file_->fstat(buf) : -1; -} - -int CachedFile::close() { - if (src_file_) { - return src_file_->close(); - } - return 0; -} - -ssize_t CachedFile::read(void *buf, size_t count) { - struct iovec v { - buf, count - }; - return readv(&v, 1); -} - -ssize_t CachedFile::readv(const struct iovec *iov, int iovcnt) { - auto ret = preadv(iov, iovcnt, readOffset_); - if (ret > 0) { - readOffset_ += ret; - } - return ret; -} - -ssize_t CachedFile::write(const void *buf, size_t count) { - struct iovec v { - const_cast(buf), count - }; - return writev(&v, 1); -} - -ssize_t CachedFile::writev(const struct iovec *iov, int iovcnt) { - auto ret = pwritev(iov, iovcnt, writeOffset_); - if (ret > 0) { - writeOffset_ += ret; - } - return ret; -} - -std::string_view CachedFile::get_pathname() { - return get_store()->get_pathname(); -} - -ICachedFile *new_cached_file(IFile *src, ICacheStore *store, uint64_t pageSize, uint64_t refillUnit, - IOAlloc *allocator, IFileSystem *fs) { - // new_cached_file requires src is able to fstat - // once stat is failed, it will return nullptr - struct stat st = {}; - if (src) { - auto ok = src->fstat(&st); - if (-1 == ok) { - LOG_ERRNO_RETURN(0, nullptr, "src_file fstat failed : `", ok); - } - } - if (st.st_size > 0) { - store->ftruncate(st.st_size); - } - return new CachedFile(src, store, st.st_size, pageSize, refillUnit, allocator, fs); -} - -} // namespace Cache diff --git a/src/overlaybd/cache/frontend/cached_file.h b/src/overlaybd/cache/frontend/cached_file.h deleted file mode 100644 index 661dec54..00000000 --- a/src/overlaybd/cache/frontend/cached_file.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -#pragma once - -#include -#include -#include -#include -#include -#include "../cache.h" -#include -#include - -struct IOAlloc; - -namespace FileSystem { -class ICacheStore; -} - -namespace Cache { -using namespace FileSystem; - -/* - * the procedures of pread are as follows: - * 1. check that the cache is hit(contain unaligned block). - * 2. if hit, just read from cache. - * 3. if not, merge all holes into one read request(offset, size), - * then read missing data from source of file and write it into cache, - * after that read cache' data into user's buffer. - */ - -class CachedFile : public ICachedFile { -public: - CachedFile(IFile *src_file, ICacheStore *cache_store, off_t size, uint64_t pageSize, - uint64_t refillUnit, IOAlloc *allocator, photon::fs::IFileSystem *fs); - ~CachedFile(); - - photon::fs::IFileSystem *filesystem(); - - ssize_t pread(void *buf, size_t count, off_t offset) override; - ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) override; - - // pwrite* need to be aligned to 4KB for avoiding write padding. - ssize_t pwrite(const void *buf, size_t count, off_t offset) override; - ssize_t pwritev(const struct iovec *iov, int iovcnt, off_t offset) override; - - UNIMPLEMENTED(off_t lseek(off_t offset, int whence)); - UNIMPLEMENTED(int fsync()); - UNIMPLEMENTED(int fdatasync()); - UNIMPLEMENTED(int fchmod(mode_t mode)); - UNIMPLEMENTED(int fchown(uid_t owner, gid_t group)); - int fstat(struct stat *buf) override; - - int close(); - - ssize_t read(void *buf, size_t count) override; - ssize_t readv(const struct iovec *iov, int iovcnt) override; - ssize_t write(const void *buf, size_t count) override; - ssize_t writev(const struct iovec *iov, int iovcnt) override; - - int fiemap(struct photon::fs::fiemap *map) override; - - int query(off_t offset, size_t count) override; - - // offset and len must be aligned 4k, otherwise it's useless. - // !!! need ensure no other read operation, otherwise read may read hole data(zero). - int fallocate(int mode, off_t offset, off_t len) override; - - IFile *get_source() override { - return src_file_; - } - - // set the source file system, and enable `auto_refill` - int set_source(IFile *src) override { - src_file_ = src; - return 0; - } - - ICacheStore *get_store() override { - return cache_store_; - } - - int ftruncate(off_t length) override { - assert(!src_file_); - size_ = length; - return 0; - } - - std::string_view get_pathname(); - -private: - ssize_t prefetch(size_t count, off_t offset); - - ssize_t preadvInternal(const struct iovec *iov, int iovcnt, off_t offset); - - IFile *src_file_; // owned by current class - ICacheStore *cache_store_; // owned by current class - off_t size_; - size_t pageSize_; - size_t refillUnit_; - - RangeLock rangeLock_; - - IOAlloc *allocator_; - photon::fs::IFileSystem *fs_; - - off_t readOffset_; - off_t writeOffset_; -}; - -ICachedFile *new_cached_file(photon::fs::IFile *src, ICacheStore *store, uint64_t pageSize, - uint64_t refillUnit, IOAlloc *allocator, photon::fs::IFileSystem *fs); - -} // namespace Cache diff --git a/src/overlaybd/cache/frontend/cached_fs.cpp b/src/overlaybd/cache/frontend/cached_fs.cpp deleted file mode 100644 index 7b3af73a..00000000 --- a/src/overlaybd/cache/frontend/cached_fs.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include -#include -#include -#include -#include "../cache.h" -#include "cached_file.h" -#include -#include - -namespace Cache { - -using namespace FileSystem; -using namespace photon::fs; - -class CachedFs : public ICachedFileSystem { -public: - CachedFs(IFileSystem *srcFs, ICachePool *fileCachePool, size_t pageSize, size_t refillUnit, - IOAlloc *allocator) - : srcFs_(srcFs), fileCachePool_(fileCachePool), pageSize_(pageSize), - refillUnit_(refillUnit), allocator_(allocator) { - } - - ~CachedFs() { - delete fileCachePool_; - } - - IFile *open(const char *pathname, int flags, mode_t mode) { - IFile *srcFile = nullptr; - if (srcFs_) { - srcFile = srcFs_->open(pathname, O_RDONLY); - if (!srcFile) - LOG_ERRNO_RETURN(0, nullptr, "Open source file failed"); - } - - auto cache_store = fileCachePool_->open(pathname, O_RDWR | O_CREAT, 0644); - if (nullptr == cache_store) { - delete srcFile; - LOG_ERRNO_RETURN(0, nullptr, "fileCachePool_ open file failed, name : `", pathname) - } - - auto ret = new_cached_file(srcFile, cache_store, pageSize_, refillUnit_, allocator_, this); - if (ret == nullptr) { // if create file is failed - // srcFile and cache_store must be release, or will leak - delete srcFile; - cache_store->release(); - } - return ret; - } - - IFile *open(const char *pathname, int flags) { - return open(pathname, flags, 0); // mode and flags are meaningless in RoCacheFS::open(2)(3) - } - - UNIMPLEMENTED_POINTER(IFile *creat(const char *pathname, mode_t mode)); - UNIMPLEMENTED(int mkdir(const char *pathname, mode_t mode)); - UNIMPLEMENTED(int rmdir(const char *pathname)); - UNIMPLEMENTED(int symlink(const char *oldname, const char *newname)); - - ssize_t readlink(const char *path, char *buf, size_t bufsiz) { - return srcFs_ ? srcFs_->readlink(path, buf, bufsiz) : -1; - } - - UNIMPLEMENTED(int link(const char *oldname, const char *newname)); - UNIMPLEMENTED(int rename(const char *oldname, const char *newname)); - UNIMPLEMENTED(int unlink(const char *filename)); - UNIMPLEMENTED(int chmod(const char *pathname, mode_t mode)); - UNIMPLEMENTED(int chown(const char *pathname, uid_t owner, gid_t group)); - UNIMPLEMENTED(int lchown(const char *pathname, uid_t owner, gid_t group)); - UNIMPLEMENTED(int utime(const char *path, const struct utimbuf *file_times) override); - UNIMPLEMENTED(int utimes(const char *path, const struct timeval times[2]) override); - UNIMPLEMENTED(int lutimes(const char *path, const struct timeval times[2]) override); - UNIMPLEMENTED(int mknod(const char *path, mode_t mode, dev_t dev) override); - - int statfs(const char *path, struct statfs *buf) { - return srcFs_ ? srcFs_->statfs(path, buf) : -1; - } - int statvfs(const char *path, struct statvfs *buf) { - return srcFs_ ? srcFs_->statvfs(path, buf) : -1; - } - int stat(const char *path, struct stat *buf) { - return srcFs_ ? srcFs_->stat(path, buf) : -1; - } - int lstat(const char *path, struct stat *buf) { - return srcFs_ ? srcFs_->stat(path, buf) : -1; - } - int access(const char *pathname, int mode) { - return srcFs_ ? srcFs_->access(pathname, mode) : -1; - } - - UNIMPLEMENTED(int truncate(const char *path, off_t length)); - UNIMPLEMENTED(int syncfs()); - - DIR *opendir(const char *name) override { - return srcFs_ ? srcFs_->opendir(name) : nullptr; - } - - IFileSystem *get_source() override { - return srcFs_; - } - - int set_source(IFileSystem *src) override { - srcFs_ = src; - return 0; - } - - ICachePool *get_pool() override { - return fileCachePool_; - } - -private: - IFileSystem *srcFs_; // owned by extern - ICachePool *fileCachePool_; // owned by current class - size_t pageSize_; - size_t refillUnit_; - - IOAlloc *allocator_; -}; - -} // namespace Cache - -namespace FileSystem { -using namespace photon::fs; -ICachedFileSystem *new_cached_fs(IFileSystem *src, ICachePool *pool, uint64_t pageSize, - uint64_t refillUnit, IOAlloc *allocator) { - if (!allocator) { - allocator = new IOAlloc; - } - return new ::Cache::CachedFs(src, pool, pageSize, refillUnit, allocator); -} -} // namespace FileSystem diff --git a/src/overlaybd/cache/full_file_cache/CMakeLists.txt b/src/overlaybd/cache/full_file_cache/CMakeLists.txt index e24124ef..8eacb764 100644 --- a/src/overlaybd/cache/full_file_cache/CMakeLists.txt +++ b/src/overlaybd/cache/full_file_cache/CMakeLists.txt @@ -4,6 +4,3 @@ add_library(full_file_cache_lib STATIC ${SRC_FULLFILECACHE}) target_include_directories(full_file_cache_lib PUBLIC ${PHOTON_INCLUDE_DIR} ) -if(BUILD_TESTING) - add_subdirectory(test) -endif() diff --git a/src/overlaybd/cache/full_file_cache/cache_pool.cpp b/src/overlaybd/cache/full_file_cache/cache_pool.cpp index a848b1cb..982bef04 100644 --- a/src/overlaybd/cache/full_file_cache/cache_pool.cpp +++ b/src/overlaybd/cache/full_file_cache/cache_pool.cpp @@ -20,23 +20,25 @@ #include #include #include -#include +#include "cache_store.h" #include +#include #include +#include #include -#include "cache_store.h" namespace Cache { using namespace FileSystem; +using namespace photon::fs; const uint64_t kGB = 1024 * 1024 * 1024; const uint64_t kMaxFreeSpace = 50 * kGB; const int64_t kEvictionMark = 5ll * kGB; -FileCachePool::FileCachePool(photon::fs::IFileSystem *mediaFs, uint64_t capacityInGB, uint64_t periodInUs, - uint64_t diskAvailInBytes, uint64_t refillUnit, Fn_trans_func name_trans) - : mediaFs_(mediaFs), capacityInGB_(capacityInGB), periodInUs_(periodInUs), +FileCachePool::FileCachePool(IFileSystem *mediaFs, uint64_t capacityInGB, uint64_t periodInUs, + uint64_t diskAvailInBytes, uint64_t refillUnit) + : ICachePool(0), mediaFs_(mediaFs), capacityInGB_(capacityInGB), periodInUs_(periodInUs), diskAvailInBytes_(diskAvailInBytes), refillUnit_(refillUnit), totalUsed_(0), timer_(nullptr), running_(false), exit_(false), isFull_(false) { int64_t capacityInBytes = capacityInGB_ * kGB; @@ -44,9 +46,6 @@ FileCachePool::FileCachePool(photon::fs::IFileSystem *mediaFs, uint64_t capacity // keep this relation : waterMark < riskMark < capacity riskMark_ = std::max(capacityInBytes - kEvictionMark, (static_cast(waterMark_) + capacityInBytes) >> 1); - if (name_trans != nullptr) { - file_name_trans = name_trans; - } } FileCachePool::~FileCachePool() { @@ -57,26 +56,27 @@ FileCachePool::~FileCachePool() { } delete timer_; } + this->stores_clear(); delete mediaFs_; } void FileCachePool::Init() { traverseDir("/"); - timer_ = new photon::Timer(periodInUs_, {this, FileCachePool::timerHandler}); + timer_ = new photon::Timer(periodInUs_, {this, FileCachePool::timerHandler}, true, + 8UL * 1024 * 1024); } ICacheStore *FileCachePool::do_open(std::string_view pathname, int flags, mode_t mode) { - auto filename = file_name_trans(pathname); - auto localFile = openMedia(filename, flags, mode); + auto localFile = openMedia(pathname, flags, mode); if (!localFile) { return nullptr; } - auto find = fileIndex_.find(filename); + auto find = fileIndex_.find(pathname); if (find == fileIndex_.end()) { auto lruIter = lru_.push_front(fileIndex_.end()); std::unique_ptr entry(new LruEntry{lruIter, 1, 0}); - find = fileIndex_.emplace(filename, std::move(entry)).first; + find = fileIndex_.emplace(pathname, std::move(entry)).first; lru_.front() = find; } else { lru_.access(find->second->lruIter); @@ -86,12 +86,12 @@ ICacheStore *FileCachePool::do_open(std::string_view pathname, int flags, mode_t return new FileCacheStore(this, localFile, refillUnit_, find); } -photon::fs::IFile *FileCachePool::openMedia(std::string_view name, int flags, int mode) { - if (name.empty()) { +IFile *FileCachePool::openMedia(std::string_view name, int flags, int mode) { + if (name.empty() || name[0] != '/') { LOG_ERROR_RETURN(EINVAL, nullptr, "pathname is invalid, path : `", name); } - auto base_directory = photon::fs::Path(name.data()).dirname(); + auto base_directory = Path(name.data()).dirname(); auto ret = mkdir_recursive(base_directory, mediaFs_); if (ret) { LOG_ERRNO_RETURN(0, nullptr, "mkdir failed, path : `", name); @@ -105,6 +105,11 @@ photon::fs::IFile *FileCachePool::openMedia(std::string_view name, int flags, in return localFile; } +int FileCachePool::set_quota(std::string_view pathname, size_t quota) { + errno = ENOSYS; + return -1; +} + int FileCachePool::stat(CacheStat *stat, std::string_view pathname) { errno = ENOSYS; return -1; @@ -120,6 +125,11 @@ int FileCachePool::evict(size_t size) { return -1; } +int FileCachePool::rename(std::string_view oldname, std::string_view newname) { + errno = ENOSYS; + return -1; +} + bool FileCachePool::isFull() { return isFull_; } @@ -215,7 +225,7 @@ void FileCachePool::eviction() { if (0 == fileIter->second->openCount) { afterFtrucate(fileIter); } - photon::thread_usleep(kDeleteDelayInUs); + photon::thread_yield(); continue; } @@ -224,16 +234,21 @@ void FileCachePool::eviction() { err = mediaFs_->truncate(fileName.data(), 0); } - if (err && errno != ENOENT) { - LOG_ERROR("truncate(0) failed, name : `, ret : `, error code : `", fileName, err, - ERRNO()); - continue; - } else { - fileSize = lruEntry->size; - afterFtrucate(fileIter); - actualEvict -= fileSize; + if (err) { + ERRNO e; + LOG_ERROR("truncate(0) failed, name : `, ret : `, error code : `", fileName, err, e); + // truncate to 0 failed means unable to free the file, it should not consider as a part + // of cache. Deal as it already release. + // The only exception is errno EINTR, means truncate interrupted by signal, should try + // again + if (e.no == EINTR) { + photon::thread_yield(); + continue; + } } - photon::thread_usleep(kDeleteDelayInUs); + afterFtrucate(fileIter); + actualEvict -= fileSize; + photon::thread_yield(); } } @@ -251,19 +266,22 @@ bool FileCachePool::afterFtrucate(FileNameMap::iterator iter) { } if (0 == iter->second->openCount) { auto err = mediaFs_->unlink(iter->first.data()); - if (0 != err) { - LOG_ERROR("unlink failed, name : `, ret : `, error code : `", iter->first, err, - ERRNO()); - } else { - lru_.remove(iter->second->lruIter); - fileIndex_.erase(iter); + ERRNO e; + LOG_ERROR("unlink failed, name : `, ret : `, error code : `", iter->first, err, e); + // unlik failed may caused by multiple reasons + // only EBUSY should may be able to trying to unlink again + // other reason should never try to clean it. + if (err && (e.no == EBUSY)) { + return false; } + lru_.remove(iter->second->lruIter); + fileIndex_.erase(iter); } return true; } int FileCachePool::traverseDir(const std::string &root) { - for (auto file : enumerable(photon::fs::Walker(mediaFs_, root))) { + for (auto file : enumerable(Walker(mediaFs_, root))) { insertFile(file); } return 0; diff --git a/src/overlaybd/cache/full_file_cache/cache_pool.h b/src/overlaybd/cache/full_file_cache/cache_pool.h index 81d1de0c..1f17be7a 100644 --- a/src/overlaybd/cache/full_file_cache/cache_pool.h +++ b/src/overlaybd/cache/full_file_cache/cache_pool.h @@ -27,20 +27,14 @@ #include "../policy/lru.h" #include "../pool_store.h" -namespace photon { - namespace fs { - class IFileSystem; - class IFile; - }; -}; // photon +#include namespace Cache { - class FileCachePool : public FileSystem::ICachePool { public: FileCachePool(photon::fs::IFileSystem *mediaFs, uint64_t capacityInGB, uint64_t periodInUs, - uint64_t diskAvailInBytes, uint64_t refillUnit, Fn_trans_func name_trans = nullptr); + uint64_t diskAvailInBytes, uint64_t refillUnit); ~FileCachePool(); static const uint64_t kDiskBlockSize = 512; // stat(2) @@ -52,10 +46,13 @@ class FileCachePool : public FileSystem::ICachePool { // pathname must begin with '/' FileSystem::ICacheStore *do_open(std::string_view pathname, int flags, mode_t mode) override; - int stat(FileSystem::CacheStat *stat, std::string_view pathname = std::string_view(nullptr, 0)) override; + int set_quota(std::string_view pathname, size_t quota) override; + int stat(FileSystem::CacheStat *stat, + std::string_view pathname = std::string_view(nullptr, 0)) override; int evict(std::string_view filename) override; int evict(size_t size = 0) override; + int rename(std::string_view oldname, std::string_view newname) override; struct LruEntry { LruEntry(uint32_t lruIt, int openCnt, uint64_t fileSize) @@ -111,7 +108,6 @@ class FileCachePool : public FileSystem::ICachePool { LRUContainer lru_; // filename -> lruEntry FileNameMap fileIndex_; - Fn_trans_func file_name_trans = &same_name_trans; }; -} // namespace Cache \ No newline at end of file +} // namespace Cache diff --git a/src/overlaybd/cache/full_file_cache/cache_store.cpp b/src/overlaybd/cache/full_file_cache/cache_store.cpp index 9fb8d46a..1b46de05 100644 --- a/src/overlaybd/cache/full_file_cache/cache_store.cpp +++ b/src/overlaybd/cache/full_file_cache/cache_store.cpp @@ -18,20 +18,23 @@ #include "sys/statvfs.h" #include #include -#include #include -#include +#include #include #include +#include #include "cache_pool.h" +using namespace FileSystem; +using namespace photon::fs; + namespace Cache { const uint64_t kDiskBlockSize = 512; // stat(2) constexpr int kFieExtentSize = 1000; const int kBlockSize = 4 * 1024; -FileCacheStore::FileCacheStore(FileSystem::ICachePool *cachePool, photon::fs::IFile *localFile, +FileCacheStore::FileCacheStore(FileSystem::ICachePool *cachePool, IFile *localFile, size_t refillUnit, FileIterator iterator) : cachePool_(static_cast(cachePool)), localFile_(localFile), refillUnit_(refillUnit), iterator_(iterator) { @@ -42,12 +45,14 @@ FileCacheStore::~FileCacheStore() { cachePool_->removeOpenFile(iterator_); } -ssize_t FileCacheStore::preadv(const struct iovec *iov, int iovcnt, off_t offset) { +ssize_t FileCacheStore::do_preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + // TODO(suoshi.yf): maybe a new interface for updating lru is better for avoiding + // multiple cacheStore preadvs but cacheFile preadv only once ssize_t ret; cachePool_->updateLru(iterator_); auto lruEntry = static_cast(iterator_->second.get()); photon::scoped_rwlock rl(lruEntry->rw_lock_, photon::RLOCK); - SCOPE_AUDIT_THRESHOLD(10UL * 1000, "file:read", AU_FILEOP("", offset, ret)); + SCOPE_AUDIT_THRESHOLD(1UL * 1000, "file:read", AU_FILEOP("", offset, ret)); ret = localFile_->preadv(iov, iovcnt, offset); return ret; } @@ -61,7 +66,7 @@ ssize_t FileCacheStore::do_pwritev(const struct iovec *iov, int iovcnt, off_t of return ret; } -ssize_t FileCacheStore::pwritev(const struct iovec *iov, int iovcnt, off_t offset) { +ssize_t FileCacheStore::do_pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { if (cacheIsFull()) { errno = ENOSPC; return -1; @@ -89,7 +94,7 @@ std::pair FileCacheStore::queryRefillRange(off_t offset, size_t s off_t alignLeft = align_down(offset, kBlockSize); off_t alignRight = align_up(offset + size, kBlockSize); ReadRequest request{alignLeft, static_cast(alignRight - alignLeft)}; - struct photon::fs::fiemap_t fie(request.offset, request.size); + struct fiemap_t fie(request.offset, request.size); fie.fm_mapped_extents = 0; if (request.size > 0) { // fiemap cannot handle size zero. @@ -112,7 +117,7 @@ std::pair FileCacheStore::queryRefillRange(off_t offset, size_t s uint64_t holeStart = request.offset; uint64_t holeEnd = request.offset + request.size; - for (auto i = fie.fm_mapped_extents - 1; i < fie.fm_mapped_extents; i--) { + for (ssize_t i = (ssize_t)(fie.fm_mapped_extents) - 1; i >= 0; i--) { auto &extent = fie.fm_extents[i]; if ((extent.fe_flags == FIEMAP_EXTENT_UNKNOWN) || (extent.fe_flags == FIEMAP_EXTENT_UNWRITTEN)) @@ -146,7 +151,12 @@ std::pair FileCacheStore::queryRefillRange(off_t offset, size_t s return std::make_pair(left, right - left); } -int FileCacheStore::stat(FileSystem::CacheStat *stat) { +int FileCacheStore::set_quota(size_t quota) { + errno = ENOSYS; + return -1; +} + +int FileCacheStore::stat(CacheStat *stat) { errno = ENOSYS; return -1; } @@ -161,7 +171,6 @@ int FileCacheStore::evict(off_t offset, size_t count) { #ifndef FALLOC_FL_PUNCH_HOLE #define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ #endif - ScopedRangeLock lock(rangeLock_, offset, count); int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; return localFile_->fallocate(mode, offset, count); } @@ -175,8 +184,4 @@ bool FileCacheStore::cacheIsFull() { return cachePool_->isFull(); } -int FileCacheStore::ftruncate(off_t length) { - return localFile_->ftruncate(length); -} - } // namespace Cache diff --git a/src/overlaybd/cache/full_file_cache/cache_store.h b/src/overlaybd/cache/full_file_cache/cache_store.h index 86faf917..ae1ed5a4 100644 --- a/src/overlaybd/cache/full_file_cache/cache_store.h +++ b/src/overlaybd/cache/full_file_cache/cache_store.h @@ -20,13 +20,6 @@ #include #include "cache_pool.h" -namespace photon { - namespace fs { - class IFileSystem; - struct fiemap; - } -} // namespace FileSystem - namespace Cache { class FileCachePool; @@ -34,17 +27,17 @@ class FileCachePool; class FileCacheStore : public FileSystem::ICacheStore { public: typedef FileCachePool::FileNameMap::iterator FileIterator; - FileCacheStore(FileSystem::ICachePool *cachePool, photon::fs::IFile *localFile, size_t refillUnit, - FileIterator iterator); + FileCacheStore(FileSystem::ICachePool *cachePool, photon::fs::IFile *localFile, + size_t refillUnit, FileIterator iterator); ~FileCacheStore(); - ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset) override; + ssize_t do_preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags) override; - ssize_t pwritev(const struct iovec *iov, int iovcnt, off_t offset) override; + ssize_t do_pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags) override; + int set_quota(size_t quota) override; int stat(FileSystem::CacheStat *stat) override; int evict(off_t offset, size_t count = -1) override; - int ftruncate(off_t length) override; std::pair queryRefillRange(off_t offset, size_t size) override; @@ -65,10 +58,10 @@ class FileCacheStore : public FileSystem::ICacheStore { std::pair getLastMergedExtents(struct fiemap *fie); std::pair getHoleFromCacheHitResult(off_t offset, size_t alignSize, - struct photon::fs::fiemap *fie); + struct fiemap *fie); - FileCachePool *cachePool_; // owned by extern class - photon::fs::IFile *localFile_; // owned by current class + FileCachePool *cachePool_; // owned by extern class + photon::fs::IFile *localFile_; // owned by current class size_t refillUnit_; FileIterator iterator_; RangeLock rangeLock_; diff --git a/src/overlaybd/cache/full_file_cache/test/cache_test.cpp b/src/overlaybd/cache/full_file_cache/test/cache_test.cpp deleted file mode 100644 index bf9840f6..00000000 --- a/src/overlaybd/cache/full_file_cache/test/cache_test.cpp +++ /dev/null @@ -1,388 +0,0 @@ -/* - Copyright The Overlaybd Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "../../cache.h" -#include "random_generator.h" - -namespace Cache { - -using namespace FileSystem; -using namespace photon::fs; - -// Cleanup and recreate the test dir -inline void SetupTestDir(const std::string &dir) { - std::string cmd = std::string("rm -r ") + dir; - system(cmd.c_str()); - cmd = std::string("mkdir -p ") + dir; - system(cmd.c_str()); -} - -void commonTest(bool cacheIsFull, bool enableDirControl, bool dirFull) { - std::string prefix = ""; - const size_t dirQuota = 32ul * 1024 * 1024; - const uint64_t refillSize = 1024 * 1024; - - std::string root("/tmp/obdcache/cache_test/"); - SetupTestDir(root); - - std::string subDir = prefix + "dir/dir/"; - SetupTestDir(root + subDir); - std::system(std::string("touch " + root + subDir + "testFile").c_str()); - - struct stat st; - auto ok = ::stat(std::string(root + subDir + "testFile").c_str(), &st); - EXPECT_EQ(0, ok); - - std::string srcRoot("/tmp/obdcache/src_test/"); - SetupTestDir(srcRoot); - auto srcFs = new_localfs_adaptor(srcRoot.c_str(), ioengine_psync); - - auto mediaFs = new_localfs_adaptor(root.c_str(), ioengine_libaio); - auto alignFs = new_aligned_fs_adaptor(mediaFs, 4 * 1024, true, true); - auto cacheAllocator = new AlignedAlloc(4 * 1024); - auto roCachedFs = new_full_file_cached_fs(srcFs, alignFs, refillSize, cacheIsFull ? 0 : 512, - 1000 * 1000 * 1, 128ul * 1024 * 1024, cacheAllocator); - auto cachePool = roCachedFs->get_pool(); - - SetupTestDir(srcRoot + prefix + "testDir"); - auto srcFile = srcFs->open(std::string(prefix + "/testDir/file_1").c_str(), - O_RDWR | O_CREAT | O_TRUNC, 0644); - - UniformCharRandomGen gen(0, 255); - off_t offset = 0; - uint32_t kPageSize = 4 * 1024; - uint32_t kFileSize = kPageSize * 16384; // 64MB - uint32_t kPageCount = kFileSize / kPageSize; - for (uint32_t i = 0; i < kPageCount; ++i) { - std::vector data; - for (uint32_t j = 0; j < kPageSize; ++j) { - data.push_back(gen.next()); - } - srcFile->pwrite(data.data(), data.size(), offset); - offset += kPageSize; - } - - // write some unaligned - off_t lastOffset = offset; - off_t unAlignedLen = 750; - { - std::vector data; - for (uint32_t j = 0; j < kPageSize; ++j) { - data.push_back(gen.next()); - } - srcFile->pwrite(data.data(), unAlignedLen, offset); - } - - auto cachedFile = static_cast( - roCachedFs->open(std::string(prefix + "/testDir/file_1").c_str(), 0, 0644)); - - // test unaligned block - { - void *buf = malloc(kPageSize); - auto ret = cachedFile->pread(buf, kPageSize, lastOffset); - - std::vector src; - src.reserve(kPageSize); - auto retSrc = srcFile->pread(src.data(), kPageSize, lastOffset); - - EXPECT_EQ(0, std::memcmp(buf, src.data(), unAlignedLen)); - EXPECT_EQ(unAlignedLen, retSrc); - EXPECT_EQ(unAlignedLen, ret); - - LOG_INFO("read again"); - - // read again - ret = cachedFile->pread(buf, kPageSize, lastOffset); - EXPECT_EQ(unAlignedLen, ret); - - free(buf); - } - - // test aligned and unaligned block - { - void *buf = malloc(kPageSize * 4); - auto ret = cachedFile->pread(buf, kPageSize * 4, lastOffset - 2 * kPageSize); - - std::vector src; - src.reserve(kPageSize * 4); - auto retSrc = srcFile->pread(src.data(), kPageSize * 4, lastOffset - 2 * kPageSize); - - EXPECT_EQ(0, std::memcmp(buf, src.data(), 2 * kPageSize + unAlignedLen)); - EXPECT_EQ(2 * kPageSize + unAlignedLen, retSrc); - EXPECT_EQ(2 * kPageSize + unAlignedLen, ret); - - LOG_INFO("read again"); - - // read again - ret = cachedFile->pread(buf, kPageSize * 4, lastOffset - 2 * kPageSize); - EXPECT_EQ(2 * kPageSize + unAlignedLen, ret); - - free(buf); - } - - std::vector readBuf; - readBuf.reserve(kPageSize); - std::vector readSrcBuf; - readSrcBuf.reserve(kPageSize); - for (int i = 0; i != 5; ++i) { - EXPECT_EQ(kPageSize, cachedFile->read(readBuf.data(), kPageSize)); - srcFile->read(readSrcBuf.data(), kPageSize); - EXPECT_EQ(0, std::memcmp(readBuf.data(), readSrcBuf.data(), kPageSize)); - } - - // test refill(3) - if (!cacheIsFull) { - auto inSrcFile = cachedFile->get_source(); - cachedFile->set_source(nullptr); - struct stat stat; - inSrcFile->fstat(&stat); - cachedFile->ftruncate(stat.st_size); - void *buf = malloc(kPageSize * 3); - DEFER(free(buf)); - std::vector src; - src.reserve(kPageSize * 3); - EXPECT_EQ(kPageSize, srcFile->pread(src.data(), kPageSize, 0)); - memcpy(buf, src.data(), kPageSize); - - EXPECT_EQ(kPageSize, cachedFile->refill(buf, kPageSize, 0)); - - memset(buf, 0, kPageSize); - EXPECT_EQ(kPageSize, cachedFile->pread(buf, kPageSize, 0)); - EXPECT_EQ(0, memcmp(buf, src.data(), kPageSize)); - - struct stat st1; - ::stat(std::string(root + prefix + "/testDir/file_1").c_str(), &st1); - EXPECT_EQ(0, cachedFile->evict(0, kPageSize)); - struct stat st2; - ::stat(std::string(root + prefix + "/testDir/file_1").c_str(), &st2); - EXPECT_EQ(kPageSize, st1.st_blocks * 512 - st2.st_blocks * 512); - - // test refill last block - src.clear(); - EXPECT_EQ(kPageSize + unAlignedLen, - srcFile->pread(src.data(), kPageSize * 3, lastOffset - kPageSize)); - memcpy(buf, src.data(), kPageSize * 3); - EXPECT_EQ(kPageSize + unAlignedLen, - cachedFile->refill(buf, kPageSize * 3, lastOffset - kPageSize)); - memset(buf, 0, kPageSize * 3); - EXPECT_EQ(kPageSize + unAlignedLen, - cachedFile->pread(buf, kPageSize * 3, lastOffset - kPageSize)); - EXPECT_EQ(0, memcmp(buf, src.data(), kPageSize + unAlignedLen)); - - cachedFile->set_source(inSrcFile); - } - - // test refill(2) - if (!cacheIsFull) { - auto inSrcFile = cachedFile->get_source(); - - void *buf = malloc(kPageSize * 2); - DEFER(free(buf)); - EXPECT_EQ(2 * kPageSize, cachedFile->refill(kPageSize, 2 * kPageSize)); - - cachedFile->set_source(nullptr); - EXPECT_EQ(2 * kPageSize, cachedFile->pread(buf, 2 * kPageSize, kPageSize)); - std::vector src; - src.reserve(kPageSize * 2); - EXPECT_EQ(kPageSize * 2, srcFile->pread(src.data(), 2 * kPageSize, kPageSize)); - EXPECT_EQ(0, memcmp(buf, src.data(), 2 * kPageSize)); - cachedFile->set_source(inSrcFile); - - // prefetch more than 16MB - EXPECT_EQ(5000 * kPageSize + kPageSize, cachedFile->prefetch(234, 5000 * kPageSize)); - // prefetch tail - EXPECT_EQ(kPageSize + unAlignedLen, - cachedFile->prefetch(lastOffset - kPageSize, 5000 * kPageSize)); - } - - if (dirFull) { - CacheStat cstat = {}; - EXPECT_EQ(0, cachePool->stat(&cstat, prefix)); - EXPECT_EQ(dirQuota / refillSize, cstat.total_size); - } - - // test aligned section - UniformInt32RandomGen genOffset(0, (kPageCount + 1) * kPageSize); - UniformInt32RandomGen genSize(0, 8 * kPageSize); - struct stat srcSt = {}; - srcFile->fstat(&srcSt); - for (int i = 0; i != 10000; ++i) { - auto tmpOffset = genOffset.next(); - auto size = genSize.next(); - - if (tmpOffset >= srcSt.st_size) { - size = 0; - } else { - size = tmpOffset + size > srcSt.st_size ? srcSt.st_size - tmpOffset : size; - } - void *buf = malloc(size); - auto ret = cachedFile->pread(buf, size, tmpOffset); - - std::vector src; - src.reserve(size); - auto retSrc = srcFile->pread(src.data(), size, tmpOffset); - - EXPECT_EQ(0, std::memcmp(buf, src.data(), size)); - EXPECT_EQ(size, retSrc); - EXPECT_EQ(size, ret); - free(buf); - } - srcFile->close(); - - photon::thread_usleep(1000 * 1000ull); - ok = ::stat(std::string(root + subDir + "testFile").c_str(), &st); - EXPECT_EQ(cacheIsFull || dirFull ? -1 : 0, ok); - - delete cachedFile; - - // test smaller file - { - auto smallFile = srcFs->open(std::string(prefix + "/testDir/small").c_str(), - O_RDWR | O_CREAT | O_TRUNC, 0644); - DEFER(delete smallFile); - int smallSize = 102; - std::vector smallData; - for (int i = 0; i != smallSize; ++i) { - smallData.push_back(gen.next()); - } - EXPECT_EQ(smallSize, smallFile->pwrite(smallData.data(), smallData.size(), 0)); - - auto smallCache = static_cast( - roCachedFs->open(std::string(prefix + "/testDir/small").c_str(), 0, 0644)); - DEFER(delete smallCache); - - void *sBuffer = malloc(kPageSize); - DEFER(free(sBuffer)); - EXPECT_EQ(smallSize, smallCache->pread(sBuffer, kPageSize, 0)); - EXPECT_EQ(0, std::memcmp(sBuffer, smallData.data(), smallSize)); - - memset(sBuffer, 0, kPageSize); - EXPECT_EQ(smallSize, smallCache->pread(sBuffer, kPageSize, 0)); - EXPECT_EQ(0, std::memcmp(sBuffer, smallData.data(), smallSize)); - - smallFile->close(); - } - - // test refill - { - auto refillFile = srcFs->open(std::string(prefix + "/testDir/refill").c_str(), - O_RDWR | O_CREAT | O_TRUNC, 0644); - DEFER(delete refillFile); - int refillSize = 4097; - std::vector refillData; - for (int i = 0; i != refillSize; ++i) { - refillData.push_back(gen.next()); - } - EXPECT_EQ(refillSize, refillFile->pwrite(refillData.data(), refillData.size(), 0)); - - auto refillCache = static_cast( - roCachedFs->open(std::string(prefix + "/testDir/refill").c_str(), 0, 0644)); - DEFER(delete refillCache); - - void *sBuffer = malloc(kPageSize * 2); - DEFER(free(sBuffer)); - memset(sBuffer, 0, kPageSize * 2); - EXPECT_EQ(kPageSize, refillCache->pread(sBuffer, kPageSize, 0)); - EXPECT_EQ(0, std::memcmp(sBuffer, refillData.data(), kPageSize)); - - memset(sBuffer, 0, kPageSize * 2); - EXPECT_EQ(refillSize, refillCache->pread(sBuffer, kPageSize * 2, 0)); - EXPECT_EQ(0, std::memcmp(sBuffer, refillData.data(), refillSize)); - - refillFile->close(); - } - - delete srcFs; - delete roCachedFs; -} - -TEST(RoCachedFs, Basic) { - commonTest(false, false, false); -} - -TEST(RoCachedFs, BasicCacheFull) { - commonTest(true, false, false); -} - -TEST(RoCachedFs, CacheWithOutSrcFile) { - std::string root("/tmp/obdcache/cache_test_no_src/"); - SetupTestDir(root); - - auto mediaFs = new_localfs_adaptor(root.c_str(), ioengine_libaio); - auto alignFs = new_aligned_fs_adaptor(mediaFs, 4 * 1024, true, true); - auto cacheAllocator = new AlignedAlloc(4 * 1024); - DEFER(delete cacheAllocator); - auto roCachedFs = new_full_file_cached_fs(nullptr, alignFs, 1024 * 1024, 512, 1000 * 1000 * 1, - 128ul * 1024 * 1024, cacheAllocator); - DEFER(delete roCachedFs); - auto cachedFile = static_cast( - roCachedFs->open(std::string("/testDir/file_1").c_str(), 0, 0644)); - DEFER(delete cachedFile); - - cachedFile->ftruncate(1024 * 1024); - std::vector buf; - int len = 8 * 1024; - buf.reserve(len); - EXPECT_EQ(len, cachedFile->pwrite(buf.data(), len, 4 * 1024)); - EXPECT_EQ(len / 2, cachedFile->pread(buf.data(), 4 * 1024, 4 * 1024)); - EXPECT_EQ(-1, cachedFile->pread(buf.data(), len, 0)); - - auto writeFile = static_cast( - roCachedFs->open(std::string("/testDir/file_2").c_str(), 0, 0644)); - DEFER(delete writeFile); - writeFile->ftruncate(1024 * 1024); - buf.assign(len, 'a'); - EXPECT_EQ(len, writeFile->write(buf.data(), len)); - EXPECT_EQ(len, writeFile->write(buf.data(), len)); - std::vector res; - res.reserve(len); - EXPECT_EQ(len, writeFile->pread(res.data(), len, 0)); - EXPECT_EQ(0, std::memcmp(buf.data(), res.data(), len)); - res.assign(len, '0'); - EXPECT_EQ(len, writeFile->pread(res.data(), len, len)); - EXPECT_EQ(0, std::memcmp(buf.data(), res.data(), len)); - EXPECT_EQ(-1, writeFile->pread(res.data(), len, len * 2)); -} - -} // namespace Cache - -int main(int argc, char **argv) { - log_output_level = 0; - ::testing::InitGoogleTest(&argc, argv); - - photon::init(photon::INIT_EVENT_DEFAULT, photon::INIT_IO_DEFAULT); - int ret = RUN_ALL_TESTS(); - return ret; -} diff --git a/src/overlaybd/cache/gzip_cache/cached_fs.cpp b/src/overlaybd/cache/gzip_cache/cached_fs.cpp index 4b09e10e..07181c3f 100644 --- a/src/overlaybd/cache/gzip_cache/cached_fs.cpp +++ b/src/overlaybd/cache/gzip_cache/cached_fs.cpp @@ -15,7 +15,7 @@ */ #include "cached_fs.h" #include "../full_file_cache/cache_pool.h" -#include "../frontend/cached_file.h" +#include "../cache.h" namespace Cache { @@ -39,7 +39,10 @@ class GzipCachedFsImpl : public GzipCachedFs { delete file; LOG_ERRNO_RETURN(0, nullptr, "file cache pool open file failed, name : `", file_name); } - auto ret = Cache::new_cached_file(file, cache_store, page_size_, refill_unit_, io_alloc_, nullptr); + cache_store->set_src_file(file); + cache_store->set_allocator(io_alloc_); + cache_store->set_page_size(page_size_); + auto ret = FileSystem::new_cached_file(cache_store, page_size_, nullptr); if (ret == nullptr) { // if create file is failed // file and cache_store must be release, or will leak delete file; diff --git a/src/overlaybd/cache/ocf_cache/test/ocf_perf_test.cpp b/src/overlaybd/cache/ocf_cache/test/ocf_perf_test.cpp index e3a6fb8f..18b4a082 100644 --- a/src/overlaybd/cache/ocf_cache/test/ocf_perf_test.cpp +++ b/src/overlaybd/cache/ocf_cache/test/ocf_perf_test.cpp @@ -224,7 +224,7 @@ static int single_file_file_cache(IOAlloc *io_alloc, photon::fs::IFileSystem *sr } auto cached_fs = FileSystem::new_full_file_cached_fs( src_fs, media_fs, FLAGS_page_size, FLAGS_media_file_size_gb, 1000 * 1000, - 2UL * FLAGS_media_file_size_gb * 1024 * 1024 * 1024, io_alloc); + 2UL * FLAGS_media_file_size_gb * 1024 * 1024 * 1024, io_alloc, 0); if (cached_fs == nullptr) { LOG_ERROR_RETURN(0, -1, "new_ocf_cached_fs error"); } diff --git a/src/overlaybd/cache/policy/lru.h b/src/overlaybd/cache/policy/lru.h index 5210bff3..48af6a40 100644 --- a/src/overlaybd/cache/policy/lru.h +++ b/src/overlaybd/cache/policy/lru.h @@ -1,3 +1,4 @@ + /* Copyright The Overlaybd Authors @@ -13,15 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ - #pragma once +#include #include #include -#include -#include #include -#include #include +#include +#include namespace FileSystem { // This is a generic LRU container, highly optimized for both speed and memory. diff --git a/src/overlaybd/cache/pool_store.h b/src/overlaybd/cache/pool_store.h index 8e6d028d..3aea97e8 100644 --- a/src/overlaybd/cache/pool_store.h +++ b/src/overlaybd/cache/pool_store.h @@ -14,33 +14,76 @@ limitations under the License. */ #pragma once +#include #include #include #include #include #include +#include #include -#include #include +#include +#include +#include + +enum ListType : int { + LIST_ALL = 0, + LIST_FILES = 1, + LIST_DIRS = 2, +}; -struct iovector; +// reset cache flags +enum ResetType : int { + RST_ALL = 0x0, // reset all cache's data, include file meta + RST_MEMORY = 0x1, // reset memory cache's data + RST_DISK = 0x2, // reset disk cache's data + RST_UPPER = 0x10, // reset upper layer cache's data + RST_LOWER = 0x20, // reset lower layer cache's data +}; -typedef std::string (*Fn_trans_func)(std::string_view name); +// resize cache flags +enum ResizeType : int { + RSZ_MEMORY = 0x1, // resize memory cache's capacity + RSZ_DISK = 0x2, // resize disk cache's capacity + RSZ_UPPER = 0x10, // resize upper layer cache's capacity + RSZ_LOWER = 0x20, // resize lower layer cache's capacity +}; namespace FileSystem { +// `CacheFnTransFunc` use to transform the filename in the cached store. +// `std::string_view` is the filename before transformation (as src_name). +// `char *` is the transformed filename (as store_key). +// `size_t` is the max buffer length of store_key. +// If transform occurs an error (such as result length more than buffer size) +// or there is not necessary to transform, this function returns 0, +// otherwise, it returns string length after transformation. +using CacheFnTransFunc = Delegate; class ICacheStore; struct CacheStat { uint32_t struct_size = sizeof(CacheStat); - uint32_t refill_unit; // in bytes - uint32_t total_size; // in refill_unit - uint32_t used_size; // in refill_unit + uint32_t refill_unit; // in bytes + uint32_t total_size; // in refill_unit + uint32_t used_size; // in refill_unit + uint64_t evict_other; // in bytes, initialized to -1UL means reset + uint64_t evict_global; // in bytes, initialized to -1UL means reset + uint64_t evict_user; // in bytes, initialized to -1UL means reset }; class ICachePool : public Object { public: - virtual ICacheStore *open(std::string_view filename, int flags, mode_t mode); + ICachePool(uint32_t pool_size = 128, uint32_t max_refilling = 128, + uint32_t refilling_threshold = -1U); + ~ICachePool(); + + ICacheStore *open(std::string_view filename, int flags, mode_t mode); + + // set quota to a dir or a file + virtual int set_quota(std::string_view pathname, size_t quota) = 0; // if pathname is {nullptr, 0} or "/", returns the overall stat + // if pathname is a dir, and it has quota set, returns its quota usage + // if pathname is a file, returns the file's stat virtual int stat(CacheStat *stat, std::string_view pathname = std::string_view(nullptr, 0)) = 0; // force to evict specified files(s) @@ -52,120 +95,196 @@ class ICachePool : public Object { int store_release(ICacheStore *store); + void stores_clear(); + + void set_trans_func(CacheFnTransFunc fn_trans_func); + virtual ICacheStore *do_open(std::string_view filename, int flags, mode_t mode) = 0; - ICacheStore *find_store_map(std::string_view pathname); + virtual int rename(std::string_view oldname, std::string_view newname) = 0; - static std::string same_name_trans(std::string_view filename) { return std::string(filename); } -protected: - unordered_map_string_key m_stores; -}; + virtual ssize_t list(const char *dirname, ListType type, const struct iovec *iov, int iovcnt, + const char *marker, uint32_t count) { + errno = ENOSYS; + return -1; + } -class ICacheStore : public Object { -public: - struct try_preadv_result { - size_t iov_sum; // sum of the iovec[] - size_t refill_size; // size in bytes to refill, 0 means cache hit - union { - off_t refill_offset; // the offset to fill, if not hit - ssize_t size; // the return value of preadv(), if hit - }; - }; + UNIMPLEMENTED_POINTER(void *get_underlay_object(int i = 0)); - // either override try_preadv() or try_preadv_mutable() - virtual try_preadv_result try_preadv(const struct iovec *iov, int iovcnt, off_t offset); - virtual try_preadv_result try_preadv_mutable(struct iovec *iov, int iovcnt, off_t offset); + // reset cache's data + virtual int reset(int flags = 0) { + errno = ENOSYS; + return -1; + } - // either override preadv() or preadv_mutable() - virtual ssize_t preadv(const struct iovec *iov, int iovcnt, off_t offset); - virtual ssize_t preadv_mutable(struct iovec *iov, int iovcnt, off_t offset); + // resize cache's capacity + virtual int resize(size_t n, int flags = 0) { + errno = ENOSYS; + return -1; + } - // either override pwritev() or pwritev_mutable() - virtual ssize_t pwritev(const struct iovec *iov, int iovcnt, off_t offset); - virtual ssize_t pwritev_mutable(struct iovec *iov, int iovcnt, off_t offset); +protected: + void *m_stores; + CacheFnTransFunc fn_trans_func; + void *m_thread_pool = nullptr; + void *m_vcpu = nullptr; // vcpu where m_therad_pool is created + std::atomic m_refilling{0}; + const uint32_t m_max_refilling = 128; + const uint32_t m_refilling_threshold = -1U; + friend class ICacheStore; +}; +class ICacheStore : public Object { +public: + virtual ~ICacheStore(); + // public interface for reading cache file store, dealing with cache-miss + // and deduplication of concurrent reading of source file. + ssize_t preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags); + ssize_t pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags); + ssize_t try_refill_range(off_t offset, size_t count); + + virtual int set_quota(size_t quota) = 0; virtual int stat(CacheStat *stat) = 0; virtual int evict(off_t offset, size_t count = -1) = 0; - virtual int ftruncate(off_t length) = 0; - - void release() { - ref_count--; - if (ref_count == 0) { - pool_->store_release(this); - try_destruct(); - } + // offset + size must <= origin file size + virtual std::pair queryRefillRange(off_t offset, size_t size) = 0; + virtual int fstat(struct stat *buf) = 0; + virtual int set_crc(uint32_t crc) { + errno = ENOSYS; + return -1; } - void add_ref() { - ref_count++; + virtual int get_crc(uint32_t *crc) { + errno = ENOSYS; + return -1; } - - uint32_t get_ref_count() { - return ref_count; + virtual uint64_t get_handle() { + return -1UL; } - bool try_destruct() { - if (ref_count == 0) { - delete this; - return true; - } - return false; + void release() { + auto ref = ref_.fetch_sub(1, std::memory_order_relaxed); + if (ref == 1 && pool_) { + pool_->store_release(this); + } else if (ref == 0) + delete this; // call do_open directly } ssize_t pread(void *buf, size_t count, off_t offset) { struct iovec iov { buf, count }; - return preadv_mutable(&iov, 1, offset); + return do_preadv2(&iov, 1, offset, 0); } + ssize_t pwrite(const void *buf, size_t count, off_t offset) { struct iovec iov { (void *)buf, count }; - return pwritev_mutable(&iov, 1, offset); + return do_pwritev2(&iov, 1, offset, 0); } - // offset + size must <= origin file size - virtual std::pair queryRefillRange(off_t offset, size_t size) = 0; - - virtual int fstat(struct stat *buf) = 0; - - virtual std::string_view get_pathname() { - return f_name_; - }; - - virtual void set_pathname(std::string_view pathname) { - f_name_ = pathname; + std::string_view get_src_name() { + return src_name_; } - - virtual uint32_t get_refcount() { - return ref_count; + void set_src_name(std::string_view pathname) { + src_name_ = pathname.data(); } - - virtual void set_pool(ICachePool *pool) { + std::string_view get_store_key() { + return store_key_; + } + void set_store_key(std::string_view pathname) { + store_key_ = pathname; + } + void set_pool(ICachePool *pool) { pool_ = pool; } + void set_cached_size(off_t cached_size); + off_t get_actual_size() { + return actual_size_; + } + void set_actual_size(off_t actual_size) { + actual_size_ = actual_size; + } + void set_open_flags(int open_flags) { + open_flags_ = open_flags; + } + int open_src_file(photon::fs::IFile **src_file = nullptr); + void set_src_file(photon::fs::IFile *src_file) { + src_file_ = src_file; + } + photon::fs::IFileSystem *get_src_fs() { + return src_fs_; + } + void set_src_fs(photon::fs::IFileSystem *src_fs) { + src_fs_ = src_fs; + } + size_t get_page_size() { + return page_size_; + } + void set_page_size(size_t page_size) { + page_size_ = page_size; + } + IOAlloc *get_allocator() { + return allocator_; + } + void set_allocator(IOAlloc *allocator) { + allocator_ = allocator; + } + + struct try_preadv_result { + size_t iov_sum; // sum of the iovec[] + size_t refill_size; // size in bytes to refill, 0 means cache hit + union { + off_t refill_offset; // the offset to fill, if not hit + ssize_t size; // the return value of preadv(), if hit + }; + }; + virtual try_preadv_result try_preadv2(const struct iovec *iov, int iovcnt, off_t offset, + int flags); + virtual ssize_t do_preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags); + virtual ssize_t do_preadv2_mutable(struct iovec *iov, int iovcnt, off_t offset, int flags); + virtual ssize_t do_pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags); + virtual ssize_t do_pwritev2_mutable(struct iovec *iov, int iovcnt, off_t offset, int flags); + +private: + ssize_t pwritev2_extend(const struct iovec *iov, int iovcnt, off_t offset, int flags); + ssize_t do_refill_range(uint64_t refill_off, uint64_t refill_size, size_t count, + IOVector *input = nullptr, off_t offset = 0, int flags = 0); + int tryget_size(); + static void *async_refill(void *args); protected: - uint32_t ref_count = 0; // store's referring count - std::string_view f_name_; - ICachePool *pool_; - ~ICacheStore(){}; + std::string src_name_; + std::string_view store_key_; + ICachePool *pool_ = nullptr; + off_t cached_size_ = 0; + off_t actual_size_ = 0; + int open_flags_ = 0; + std::atomic ref_{0}; + photon::fs::IFile *src_file_ = nullptr; + photon::fs::IFileSystem *src_fs_ = nullptr; + size_t page_size_ = 4096; + IOAlloc *allocator_ = nullptr; + RangeLock range_lock_; + photon::mutex open_lock_; + friend class ICachePool; }; class IMemCacheStore : public ICacheStore { public: - // Get the internal buffer for the specified LBA range (usually aligned), - // which will remain valid for user until released by unpin_buffer(). - // Will allocate pages for missed ranges. - // Will refill / fetch / load data from source if `refill`. - // Concurrent R/W to a same range are guaranteed to work, but considered - // a race-condition and the result is undefiend. - // returns # of bytes actually got, or <0 for failures - virtual ssize_t pin_buffer(off_t offset, size_t count, /*OUT*/ iovector *iov) = 0; - - // Release buffers got from pin_buffer(), - // and the buffer is no longer valid for user. - // return 0 for success, < 0 for failures - virtual int unpin_buffer(off_t offset, size_t count) = 0; + virtual ssize_t pin_buffer(off_t offset, size_t count, int flags, /*OUT*/ iovector *iov, + void **pin_result) = 0; + + virtual int unpin_buffer(void *pin_result) = 0; +}; + +class IMemCachePool : public ICachePool { +public: + using ICachePool::ICachePool; + + virtual ssize_t pin_buffer(uint64_t handle, off_t offset, size_t count, int flags, + /*OUT*/ iovector *iov, void **pin_result) = 0; + + virtual int unpin_buffer(void *pin_result) = 0; }; } // namespace FileSystem diff --git a/src/overlaybd/cache/store.cpp b/src/overlaybd/cache/store.cpp new file mode 100644 index 00000000..9ae2ba28 --- /dev/null +++ b/src/overlaybd/cache/store.cpp @@ -0,0 +1,427 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#include "pool_store.h" +#include "cache.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace FileSystem; +using namespace photon::fs; + +namespace FileSystem { + +static const uint32_t MAX_REFILLING = 128; + +ICacheStore::~ICacheStore() { + delete src_file_; +} + +ssize_t ICacheStore::preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + if (offset < 0) + LOG_ERROR_RETURN(EINVAL, -1, "offset is invalid, offset : `", offset); + iovector_view view(const_cast(iov), iovcnt); + size_t iov_size = view.sum(); + if (0u == iov_size) + return 0; + if (offset >= actual_size_ || offset + static_cast(iov_size) > actual_size_) { + if (tryget_size() != 0) { + LOG_ERROR_RETURN(0, -1, "try get size failed, actual_size_ : `, offset : `, count : `", + actual_size_, offset, iov_size); + } + } + + if (offset >= actual_size_) + return 0; + IOVector input(iov, iovcnt); + if (offset + static_cast(iov_size) > actual_size_) { + input.extract_back(offset + static_cast(iov_size) - actual_size_); + iov_size = actual_size_ - offset; + } + + if ((flags & RW_V2_CACHE_ONLY) || (open_flags_ & O_CACHE_ONLY)) { + auto tr = try_preadv2(input.iovec(), input.iovcnt(), offset, flags); + if (tr.refill_size == 0 && tr.size >= 0) { + return tr.size; + } else { + return -1; + } + } + +again: + auto tr = try_preadv2(input.iovec(), input.iovcnt(), offset, flags); + if (tr.refill_size == 0 && tr.size >= 0) + return tr.size; + // open src file only when cache miss + if (open_src_file() != 0 || !src_file_) { + LOG_ERROR_RETURN(0, -1, "cache preadv2 failed, offset : `, count : `, flags : `", offset, + iov_size, flags); + } + + if (tr.refill_offset < 0) { + SCOPE_AUDIT("download", AU_FILEOP(get_src_name(), offset, tr.size)); + tr.size = src_file_->preadv2(input.iovec(), input.iovcnt(), offset, flags); + return tr.size; + } + + ssize_t ret = + do_refill_range(tr.refill_offset, tr.refill_size, iov_size, &input, offset, flags); + if (ret == -EAGAIN) + goto again; + return ret; +} + +ssize_t ICacheStore::pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + if (open_flags_ & (O_WRITE_THROUGH | O_CACHE_ONLY | O_WRITE_BACK)) { + return pwritev2_extend(iov, iovcnt, offset, flags); + } + + iovector_view view(const_cast(iov), iovcnt); + size_t size = view.sum(); + if (offset >= actual_size_ || offset + static_cast(size) > actual_size_) { + if (tryget_size() < 0) { + LOG_ERROR_RETURN(0, -1, "try get size failed, actual_size_ : `, offset : `, count : `", + actual_size_, offset, size); + } + } + + if (offset >= actual_size_) + return 0; + if (offset % page_size_ != 0 || + (size % page_size_ != 0 && offset + static_cast(size) < actual_size_)) { + LOG_ERROR_RETURN(EINVAL, -1, "size or offset is not aligned to `, size : `, offset : `", + page_size_, size, offset); + } + + if (offset + static_cast(size) <= actual_size_) { + return do_pwritev2(iov, iovcnt, offset, flags); + } + + IOVector io_vector(iov, iovcnt); + if (offset + static_cast(size) > actual_size_) { + auto ret = io_vector.extract_back(size - (actual_size_ - offset)); + if (ret != size - (actual_size_ - offset)) + LOG_ERRNO_RETURN(EINVAL, -1, "extract failed, extractSize : `, expected : ", ret, + size - (actual_size_ - offset)) + } + + auto write = do_pwritev2(io_vector.iovec(), io_vector.iovcnt(), offset, flags); + if (write != static_cast(io_vector.sum())) { + if (ENOSPC != errno) + LOG_ERROR( + "cache file write failed : `, error : `, actual_size_ : `, offset : `, sum : `", + write, ERRNO(errno), actual_size_, offset, io_vector.sum()); + } + + return write; +} + +ssize_t ICacheStore::try_refill_range(off_t offset, size_t count) { + if (offset >= actual_size_ || offset + static_cast(count) > actual_size_) { + if (tryget_size() != 0) { + LOG_ERROR_RETURN(0, -1, "try get size failed, actual_size_ : `, offset : `, count : `", + actual_size_, offset, count); + } + } + + if (offset >= actual_size_) + return 0; + if (offset + static_cast(count) > actual_size_) { + count = actual_size_ - offset; + } + +again: + auto qres = queryRefillRange(offset, count); + if (qres.first < 0) + return -1; + if (qres.second == 0) + return static_cast(count); + // open src file only when cache miss + if (open_src_file() != 0 || !src_file_) { + LOG_ERROR_RETURN(0, -1, + "try refill_range failed due to null src file, offset : `, count : `", + offset, count); + } + + ssize_t ret = do_refill_range(qres.first, qres.second, count); + if (ret == -EAGAIN) + goto again; + return ret; +} + +struct RefillContext { + ICacheStore *store; + IOVector buffer; + uint64_t refill_off; + uint64_t refill_size; + int flags; +}; + +void *ICacheStore::async_refill(void *args) { + auto ctx = (RefillContext *)args; + auto write = ctx->store->do_pwritev2(ctx->buffer.iovec(), ctx->buffer.iovcnt(), ctx->refill_off, + ctx->flags); + if (write != static_cast(ctx->refill_size)) { + if (ENOSPC != errno) + LOG_ERROR( + "cache file write failed : `, error : `, actual_size_ : `, offset : `, sum : `", + write, ERRNO(errno), ctx->store->actual_size_, ctx->refill_off, ctx->buffer.sum()); + } + + ctx->store->pool_->m_refilling.fetch_sub(1, std::memory_order_relaxed); + ctx->store->range_lock_.unlock(ctx->refill_off, ctx->refill_size); + ctx->store->release(); + photon::thread_migrate(photon::CURRENT, + static_cast(ctx->store->pool_->m_vcpu)); + delete ctx; + return nullptr; +} + +ssize_t ICacheStore::do_refill_range(uint64_t refill_off, uint64_t refill_size, size_t count, + IOVector *input, off_t offset, int flags) { + ssize_t ret = 0; + if (input && pool_ && + pool_->m_refilling.load(std::memory_order_relaxed) > pool_->m_refilling_threshold) { + SCOPE_AUDIT("download", AU_FILEOP(get_src_name(), offset, ret)); + ret = src_file_->preadv2(input->iovec(), input->iovcnt(), offset, flags); + return ret; + } + + if (refill_off + refill_size > static_cast(actual_size_)) { + refill_size = actual_size_ - refill_off; + } + + ret = range_lock_.try_lock_wait(refill_off, refill_size); + if (ret < 0) + return -EAGAIN; + { + static uint32_t max_refilling = pool_ ? pool_->m_max_refilling : MAX_REFILLING; + uint32_t refilling = max_refilling; + DEFER({ + if (refilling >= max_refilling) + range_lock_.unlock(refill_off, refill_size); + }); + IOVector buffer(*allocator_); + auto alloc = buffer.push_back(refill_size); + if (alloc < refill_size) { + LOG_ERROR("memory allocate failed, refill_size:`, alloc:`", refill_size, alloc); + if (input) { + SCOPE_AUDIT("download", AU_FILEOP(get_src_name(), offset, ret)); + ret = src_file_->preadv2(input->iovec(), input->iovcnt(), offset, flags); + return ret; + } else + return -1; + } + + { + SCOPE_AUDIT("download", AU_FILEOP(get_src_name(), refill_off, ret)); + ret = src_file_->preadv2(buffer.iovec(), buffer.iovcnt(), refill_off, flags); + } + + if (ret != static_cast(refill_size)) { + LOG_ERRNO_RETURN( + 0, -1, + "src file read failed, read : `, expectRead : `, actual_size_ : `, offset : `, sum : `", + ret, refill_size, actual_size_, refill_off, buffer.sum()); + } + + // buffer need async refill + IOVector refill_buf(buffer.iovec(), buffer.iovcnt()); + if (input && (off_t)refill_off <= offset) { + auto view = input->view(); + refill_buf.extract_front(offset - refill_off); + ret = refill_buf.memcpy_to(&view, count); + offset += ret; + } else if (input && refill_off + refill_size >= offset + count) { + iovector_view tail_iov; + tail_iov.iovcnt = 0; + input->slice(count - (refill_off - offset), refill_off - offset, &tail_iov); + ret = refill_buf.memcpy_to(&tail_iov); + input->extract_back(ret); + } else + ret = 0; + + if (input && pool_ && pool_->m_thread_pool && + (refilling = pool_->m_refilling.load(std::memory_order_relaxed)) < + pool_->m_max_refilling) { + pool_->m_refilling.fetch_add(1, std::memory_order_relaxed); + ref_.fetch_add(1, std::memory_order_relaxed); + auto ctx = new RefillContext{this, std::move(buffer), refill_off, refill_size, flags}; + auto th = static_cast(pool_->m_thread_pool) + ->thread_create(&async_refill, ctx); + photon::thread_migrate(th, photon::get_vcpu()); + } else { + auto write = do_pwritev2(buffer.iovec(), buffer.iovcnt(), refill_off, flags); + if (write != static_cast(refill_size)) { + if (ENOSPC != errno) + LOG_ERROR( + "cache file write failed : `, error : `, actual_size_ : `, offset : `, sum : `", + write, ERRNO(errno), actual_size_, refill_off, buffer.sum()); + if (!input) + return -1; + } + } + } + + if (input && ret != (ssize_t)count) { + auto tr = try_preadv2(input->iovec(), input->iovcnt(), offset, flags); + if (tr.refill_size != 0 || tr.size < 0) { + SCOPE_AUDIT("download", AU_FILEOP(get_src_name(), offset, tr.size)); + tr.size = src_file_->preadv2(input->iovec(), input->iovcnt(), offset, flags); + if (tr.size + ret != static_cast(count)) + LOG_ERRNO_RETURN(0, -1, "read failed, ret:`, offset:`,sum:`,actual_size_:`", + tr.size, offset, input->sum(), actual_size_); + } + } + + return count; +} + +void ICacheStore::set_cached_size(off_t cached_size) { + if (cached_size_ == 0) { + cached_size_ = cached_size; + } else if (cached_size > cached_size_) { + off_t last = cached_size_ / page_size_ * page_size_; + if (last != cached_size_) + evict(last); + cached_size_ = last; + } else if (cached_size < cached_size_) { + off_t last = cached_size / page_size_ * page_size_; + evict(last); + cached_size_ = last; + } +} + +ICacheStore::try_preadv_result ICacheStore::try_preadv2(const struct iovec *iov, int iovcnt, + off_t offset, int flags) { + try_preadv_result rst; + iovector_view view((iovec *)iov, iovcnt); + rst.iov_sum = view.sum(); + auto q = queryRefillRange(offset, rst.iov_sum); + if (q.first >= 0 && q.second == 0) { // no need to refill + rst.refill_size = 0; + rst.size = do_preadv2(iov, iovcnt, offset, flags); + if (rst.size != (ssize_t)rst.iov_sum) { + rst.refill_size = (size_t)-1; + rst.refill_offset = -1; + } + } else { + rst.refill_size = q.second; + rst.refill_offset = q.first; + } + + return rst; +} + +ssize_t ICacheStore::do_preadv2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + SmartCloneIOV<32> ciov(iov, iovcnt); + return do_preadv2_mutable(ciov.iov, iovcnt, offset, flags); +} + +ssize_t ICacheStore::do_preadv2_mutable(struct iovec *iov, int iovcnt, off_t offset, int flags) { + return do_preadv2(iov, iovcnt, offset, flags); +} + +ssize_t ICacheStore::do_pwritev2(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + SmartCloneIOV<32> ciov(iov, iovcnt); + return do_pwritev2_mutable(ciov.iov, iovcnt, offset, flags); +} + +ssize_t ICacheStore::do_pwritev2_mutable(struct iovec *iov, int iovcnt, off_t offset, int flags) { + return do_pwritev2(iov, iovcnt, offset, flags); +} + +int ICacheStore::open_src_file(IFile **src_file) { + if (!src_fs_ || (open_flags_ & O_CACHE_ONLY)) { + if (src_file) + *src_file = src_file_; + return 0; + } + photon::scoped_lock l(open_lock_); + if (src_file_) { + if (src_file) + *src_file = src_file_; + return 0; + } + int flags = O_RDONLY; + if (open_flags_ & (O_WRITE_THROUGH | O_WRITE_BACK)) + flags |= O_CREAT; + src_file_ = src_fs_->open(src_name_.c_str(), flags); + if (!src_file_) + LOG_ERRNO_RETURN(0, -1, "open source ` failed", src_name_.c_str()); + if (src_file) + *src_file = src_file_; + return 0; +} + +ssize_t ICacheStore::pwritev2_extend(const struct iovec *iov, int iovcnt, off_t offset, int flags) { + iovector_view view(const_cast(iov), iovcnt); + size_t size = view.sum(); + if (offset % page_size_ != 0) { + LOG_ERROR_RETURN(EINVAL, -1, "offset is not aligned to `, size : `, offset : `", page_size_, + size, offset); + } + + // append only + if (offset + (off_t)size > cached_size_) { + off_t last = cached_size_ / page_size_ * page_size_; + if (last != cached_size_) { + evict(last); + cached_size_ = last; + actual_size_ = cached_size_; + } + } + + auto write = do_pwritev2(iov, iovcnt, offset, flags); + if (write != static_cast(size)) { + if (ENOSPC != errno) + LOG_ERROR( + "cache file write failed : `, error : `, actual_size_ : `, offset : `, sum : `", + write, ERRNO(errno), actual_size_, offset, size); + } + + // append only + if (write > 0 && offset + write > cached_size_) { + cached_size_ = offset + write; + if (actual_size_ < cached_size_) { + actual_size_ = cached_size_; + } + } + + return write; +} + +int ICacheStore::tryget_size() { + if (actual_size_ % page_size_ != 0) + return 0; + if (open_src_file() != 0) + return -1; + struct stat buf; + buf.st_size = 0; + if ((src_file_ && src_file_->fstat(&buf) != 0) || (!src_file_ && fstat(&buf) != 0)) + return -1; + if (buf.st_size != actual_size_) { + set_cached_size(buf.st_size); + actual_size_ = buf.st_size; + } + return 0; +} + +} // namespace FileSystem diff --git a/src/overlaybd/cache/full_file_cache/test/CMakeLists.txt b/src/overlaybd/cache/test/CMakeLists.txt similarity index 99% rename from src/overlaybd/cache/full_file_cache/test/CMakeLists.txt rename to src/overlaybd/cache/test/CMakeLists.txt index 6d57d2be..768642e4 100644 --- a/src/overlaybd/cache/full_file_cache/test/CMakeLists.txt +++ b/src/overlaybd/cache/test/CMakeLists.txt @@ -11,5 +11,4 @@ target_link_libraries(cache_test gtest gtest_main gflags pthread photon_static o add_test( NAME cache_test COMMAND ${EXECUTABLE_OUTPUT_PATH}/cache_test -) - +) \ No newline at end of file diff --git a/src/overlaybd/cache/test/cache_test.cpp b/src/overlaybd/cache/test/cache_test.cpp new file mode 100644 index 00000000..d0c0553b --- /dev/null +++ b/src/overlaybd/cache/test/cache_test.cpp @@ -0,0 +1,558 @@ +/* + Copyright The Overlaybd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "photon/common/alog.h" +#include "photon/common/callback.h" +#include "photon/fs/localfs.h" +#include "photon/fs/aligned-file.h" +#include "photon/thread/thread.h" +#include "photon/io/fd-events.h" +#include "photon/io/aio-wrapper.h" +#include "photon/common/io-alloc.h" +#include "../cache.h" +#include "random_generator.h" + +namespace Cache { + +using namespace FileSystem; +using namespace photon::fs; + +// Cleanup and recreate the test dir +inline void SetupTestDir(const std::string& dir) { + std::string cmd = std::string("rm -r ") + dir; + system(cmd.c_str()); + cmd = std::string("mkdir -p ") + dir; + system(cmd.c_str()); +} + +void commonTest(bool cacheIsFull, bool enableDirControl, bool dirFull) { + std::string prefix = ""; + const size_t dirQuota = 32ul * 1024 * 1024; + const uint64_t refillSize = 1024 * 1024; + if (enableDirControl) { + prefix = "/John/bucket/"; + } + + std::string root("/tmp/ease/cache/cache_test/"); + SetupTestDir(root); + + std::string subDir = prefix + "dir/dir/"; + SetupTestDir(root + subDir); + std::system(std::string("touch " + root + subDir + "testFile").c_str()); + + struct stat st; + auto ok = ::stat(std::string(root + subDir + "testFile").c_str(), &st); + EXPECT_EQ(0, ok); + + std::string srcRoot("/tmp/ease/cache/src_test/"); + SetupTestDir(srcRoot); + auto srcFs = new_localfs_adaptor(srcRoot.c_str(), ioengine_psync); + + auto mediaFs = new_localfs_adaptor(root.c_str(), ioengine_libaio); + auto alignFs = new_aligned_fs_adaptor(mediaFs, 4 * 1024, true, true); + auto cacheAllocator = new AlignedAlloc(4 * 1024); + auto roCachedFs = new_full_file_cached_fs(srcFs, alignFs, refillSize, + cacheIsFull ? 0 : 512, 1000 * 1000 * 1, 128ul * 1024 * 1024, cacheAllocator, enableDirControl ? 2 : 0); + auto cachePool = roCachedFs->get_pool(); + + if (dirFull) { + cachePool->set_quota(prefix, dirQuota); + } + SetupTestDir(srcRoot + prefix + "testDir"); + auto srcFile = srcFs->open(std::string(prefix + "/testDir/file_1").c_str(), + O_RDWR|O_CREAT|O_TRUNC, 0644); + + UniformCharRandomGen gen(0, 255); + off_t offset = 0; + uint32_t kPageSize = 4 * 1024; + uint32_t kFileSize = kPageSize * 16384; // 64MB + uint32_t kPageCount = kFileSize / kPageSize; + for (uint32_t i = 0; i < kPageCount; ++i) { + std::vector data; + for (uint32_t j = 0; j < kPageSize; ++j) { + data.push_back(gen.next()); + } + srcFile->pwrite(data.data(), data.size(), offset); + offset += kPageSize; + } + + // write some unaligned + off_t lastOffset = offset; + off_t unAlignedLen = 750; + { + std::vector data; + for (uint32_t j = 0; j < kPageSize; ++j) { + data.push_back(gen.next()); + } + srcFile->pwrite(data.data(), unAlignedLen, offset); + } + + auto cachedFile = static_cast(roCachedFs->open( + std::string(prefix + "/testDir/file_1").c_str(), 0, 0644)); + + // test unaligned block + { + void* buf = malloc(kPageSize); + auto ret = cachedFile->pread(buf, kPageSize, lastOffset); + + std::vector src; + src.reserve(kPageSize); + auto retSrc = srcFile->pread(src.data(), kPageSize, lastOffset); + + EXPECT_EQ(0, std::memcmp(buf, src.data(), unAlignedLen)); + EXPECT_EQ(unAlignedLen, retSrc); + EXPECT_EQ(unAlignedLen, ret); + + LOG_INFO("read again"); + + // read again + ret = cachedFile->pread(buf, kPageSize, lastOffset); + EXPECT_EQ(unAlignedLen, ret); + + free(buf); + } + + // test aligned and unaligned block + { + void* buf = malloc(kPageSize * 4); + auto ret = cachedFile->pread(buf, kPageSize * 4, lastOffset - 2 * kPageSize); + + std::vector src; + src.reserve(kPageSize * 4); + auto retSrc = srcFile->pread(src.data(), kPageSize * 4, lastOffset - 2 * kPageSize); + + EXPECT_EQ(0, std::memcmp(buf, src.data(), 2 * kPageSize + unAlignedLen)); + EXPECT_EQ(2 * kPageSize + unAlignedLen, retSrc); + EXPECT_EQ(2 * kPageSize + unAlignedLen, ret); + + LOG_INFO("read again"); + + // read again + ret = cachedFile->pread(buf, kPageSize * 4, lastOffset - 2 * kPageSize); + EXPECT_EQ(2 * kPageSize + unAlignedLen, ret); + + free(buf); + } + + std::vector readBuf; + readBuf.reserve(kPageSize); + std::vector readSrcBuf; + readSrcBuf.reserve(kPageSize); + for (int i = 0; i != 5; ++i) { + EXPECT_EQ(kPageSize, cachedFile->read(readBuf.data(), kPageSize)); + srcFile->read(readSrcBuf.data(), kPageSize); + EXPECT_EQ(0, std::memcmp(readBuf.data(), readSrcBuf.data(), kPageSize)); + } + + if (enableDirControl && !cacheIsFull) { + CacheStat cstat = {}; + EXPECT_EQ(0, cachePool->stat(&cstat, std::string(prefix + "/testDir/file_1").c_str())); + EXPECT_EQ(kFileSize / refillSize, cstat.total_size); + cstat = {}; + EXPECT_EQ(0, cachedFile->get_store()->stat(&cstat)); + EXPECT_EQ(kFileSize / refillSize, cstat.total_size); + } + + // test refill(3) + if (!cacheIsFull) { + auto inSrcFile = cachedFile->get_source(); + cachedFile->set_source(nullptr); + struct stat stat; + inSrcFile->fstat(&stat); + cachedFile->ftruncate(stat.st_size); + void* buf = malloc(kPageSize * 3); + DEFER(free(buf)); + std::vector src; + src.reserve(kPageSize * 3); + EXPECT_EQ(kPageSize, srcFile->pread(src.data(), kPageSize, 0)); + memcpy(buf, src.data(), kPageSize); + + EXPECT_EQ(kPageSize, cachedFile->refill(buf, kPageSize, 0)); + + memset(buf, 0, kPageSize); + EXPECT_EQ(kPageSize, cachedFile->pread(buf, kPageSize, 0)); + EXPECT_EQ(0, memcmp(buf, src.data(), kPageSize)); + + struct stat st1; + ::stat(std::string(root + prefix + "/testDir/file_1").c_str(), &st1); + EXPECT_EQ(0, cachedFile->evict(0, kPageSize)); + struct stat st2; + ::stat(std::string(root + prefix + "/testDir/file_1").c_str(), &st2); + EXPECT_EQ(kPageSize, st1.st_blocks * 512 - st2.st_blocks * 512); + + // test refill last block + src.clear(); + EXPECT_EQ(kPageSize + unAlignedLen, srcFile->pread(src.data(), kPageSize * 3, lastOffset - kPageSize)); + memcpy(buf, src.data(), kPageSize * 3); + EXPECT_EQ(kPageSize + unAlignedLen, cachedFile->refill(buf, kPageSize * 3, lastOffset - kPageSize)); + memset(buf, 0, kPageSize * 3); + EXPECT_EQ(kPageSize + unAlignedLen, cachedFile->pread(buf, kPageSize * 3, lastOffset - kPageSize)); + EXPECT_EQ(0, memcmp(buf, src.data(), kPageSize + unAlignedLen)); + + cachedFile->set_source(inSrcFile); + } + + // test refill(2) + if (!cacheIsFull) { + auto inSrcFile = cachedFile->get_source(); + + void* buf = malloc(kPageSize * 2); + DEFER(free(buf)); + EXPECT_EQ(0, cachedFile->refill(kPageSize, 2 * kPageSize)); + + cachedFile->set_source(nullptr); + EXPECT_EQ(2 * kPageSize, cachedFile->pread(buf, 2 * kPageSize, kPageSize)); + std::vector src; + src.reserve(kPageSize * 2); + EXPECT_EQ(kPageSize * 2, srcFile->pread(src.data(), 2 * kPageSize, kPageSize)); + EXPECT_EQ(0, memcmp(buf, src.data(), 2 * kPageSize)); + cachedFile->set_source(inSrcFile); + + // prefetch more than 16MB + EXPECT_EQ(0, cachedFile->fadvise(234, 5000 * kPageSize, POSIX_FADV_WILLNEED)); + // prefetch tail + EXPECT_EQ(0, cachedFile->fadvise(lastOffset - kPageSize, 5000 * kPageSize, POSIX_FADV_WILLNEED)); + } + + if (dirFull) { + CacheStat cstat = {}; + EXPECT_EQ(0, cachePool->stat(&cstat, prefix)); + EXPECT_EQ(dirQuota / refillSize, cstat.total_size); + } + + // test aligned section + UniformInt32RandomGen genOffset(0, (kPageCount + 1) * kPageSize); + UniformInt32RandomGen genSize(0, 8 * kPageSize); + struct stat srcSt = {}; + srcFile->fstat(&srcSt); + for (int i = 0; i != 10000; ++i) { + auto tmpOffset = genOffset.next(); + auto size = genSize.next(); + + if (tmpOffset >= srcSt.st_size) { + size = 0; + } else { + size = tmpOffset + size > srcSt.st_size ? srcSt.st_size - tmpOffset : size; + } + void* buf = malloc(size); + auto ret = cachedFile->pread(buf, size, tmpOffset); + + std::vector src; + src.reserve(size); + auto retSrc = srcFile->pread(src.data(), size, tmpOffset); + + EXPECT_EQ(0, std::memcmp(buf, src.data(), size)); + EXPECT_EQ(size, retSrc); + EXPECT_EQ(size, ret); + free(buf); + + if (9900 == i && dirFull) { + cachedFile->get_store()->set_quota(0); + } + } + srcFile->close(); + + photon::thread_usleep(1000 * 1000ull); + ok = ::stat(std::string(root + subDir + "testFile").c_str(), &st); + EXPECT_EQ(cacheIsFull || dirFull ? -1 : 0, ok); + + if (enableDirControl) { + auto ret = cachePool->evict(std::string(prefix + "/testDir").c_str()); + EXPECT_EQ(0, ret); + } + + delete cachedFile; + + // test smaller file + { + auto smallFile = srcFs->open(std::string(prefix + "/testDir/small").c_str(), + O_RDWR|O_CREAT|O_TRUNC, 0644); + DEFER(delete smallFile); + int smallSize = 102; + std::vector smallData; + for (int i = 0; i != smallSize; ++i) { + smallData.push_back(gen.next()); + } + EXPECT_EQ(smallSize, smallFile->pwrite(smallData.data(), smallData.size(), 0)); + + auto smallCache = static_cast(roCachedFs->open( + std::string(prefix + "/testDir/small").c_str(), 0, 0644)); + DEFER(delete smallCache); + + void* sBuffer = malloc(kPageSize); + DEFER(free(sBuffer)); + EXPECT_EQ(smallSize, smallCache->pread(sBuffer, kPageSize, 0)); + EXPECT_EQ(0, std::memcmp(sBuffer, smallData.data(), smallSize)); + + memset(sBuffer, 0, kPageSize); + EXPECT_EQ(smallSize, smallCache->pread(sBuffer, kPageSize, 0)); + EXPECT_EQ(0, std::memcmp(sBuffer, smallData.data(), smallSize)); + + smallFile->close(); + } + + // test refill + { + auto refillFile = srcFs->open(std::string(prefix + "/testDir/refill").c_str(), + O_RDWR|O_CREAT|O_TRUNC, 0644); + DEFER(delete refillFile); + int refillSize = 4097; + std::vector refillData; + for (int i = 0; i != refillSize; ++i) { + refillData.push_back(gen.next()); + } + EXPECT_EQ(refillSize, refillFile->pwrite(refillData.data(), refillData.size(), 0)); + + auto refillCache = static_cast(roCachedFs->open( + std::string(prefix + "/testDir/refill").c_str(), 0, 0644)); + DEFER(delete refillCache); + + void* sBuffer = malloc(kPageSize * 2); + DEFER(free(sBuffer)); + memset(sBuffer, 0, kPageSize * 2); + EXPECT_EQ(kPageSize, refillCache->pread(sBuffer, kPageSize, 0)); + EXPECT_EQ(0, std::memcmp(sBuffer, refillData.data(), kPageSize)); + + memset(sBuffer, 0, kPageSize * 2); + EXPECT_EQ(refillSize, refillCache->pread(sBuffer, kPageSize * 2, 0)); + EXPECT_EQ(0, std::memcmp(sBuffer, refillData.data(), refillSize)); + + refillFile->close(); + } + + delete srcFs; + delete roCachedFs; +} + +TEST(RoCachedFs, Basic) { + commonTest(false, false, false); +} + +TEST(RoCachedFs, BasicCacheFull) { + commonTest(true, false, false); +} + +// TEST(RoCachedFs, BasicWithDirControl) { +// commonTest(false, true, false); +// } + +// TEST(RoCachedFs, BasicCacheFullWithDirControl) { +// commonTest(true, true, false); +// } + +TEST(RoCachedFs, CacheWithOutSrcFile) { + std::string root("/tmp/ease/cache/cache_test_no_src/"); + SetupTestDir(root); + + auto mediaFs = new_localfs_adaptor(root.c_str(), ioengine_libaio); + auto alignFs = new_aligned_fs_adaptor(mediaFs, 4 * 1024, true, true); + auto cacheAllocator = new AlignedAlloc(4 * 1024); + DEFER(delete cacheAllocator); + auto roCachedFs = new_full_file_cached_fs(nullptr, alignFs, 1024 * 1024, + 512, 1000 * 1000 * 1, 128ul * 1024 * 1024, cacheAllocator, 0); + DEFER(delete roCachedFs); + auto cachedFile = static_cast(roCachedFs->open( + std::string("/testDir/file_1").c_str(), 0, 0644)); + DEFER(delete cachedFile); + + cachedFile->ftruncate(1024 * 1024); + std::vector buf; + int len = 8 * 1024; + buf.reserve(len); + EXPECT_EQ(len, cachedFile->pwrite(buf.data(), len, 4 * 1024)); + EXPECT_EQ(len / 2, cachedFile->pread(buf.data(), 4 * 1024, 4 * 1024)); + EXPECT_EQ(-1, cachedFile->pread(buf.data(), len, 0)); + + auto writeFile = static_cast(roCachedFs->open( + std::string("/testDir/file_2").c_str(), 0, 0644)); + DEFER(delete writeFile); + writeFile->ftruncate(1024 * 1024); + buf.assign(len, 'a'); + EXPECT_EQ(len, writeFile->write(buf.data(), len)); + EXPECT_EQ(len, writeFile->write(buf.data(), len)); + std::vector res; + res.reserve(len); + EXPECT_EQ(len, writeFile->pread(res.data(), len, 0)); + EXPECT_EQ(0, std::memcmp(buf.data(), res.data(), len)); + res.assign(len, '0'); + EXPECT_EQ(len, writeFile->pread(res.data(), len, len)); + EXPECT_EQ(0, std::memcmp(buf.data(), res.data(), len)); + EXPECT_EQ(-1, writeFile->pread(res.data(), len, len * 2)); +} + +TEST(RoCachedFS, xattr) { + std::string root("/tmp/ease/cache/cache_xattr/"); + SetupTestDir(root); + + auto srcFs = new_localfs_adaptor(); + auto mediaFs = new_localfs_adaptor(root.c_str()); + auto roCachedFs = new_full_file_cached_fs(srcFs, mediaFs, 1024 * 1024, 512, 1000 * 1000 * 1, + 128ul * 1024 * 1024, nullptr, 0); + DEFER(delete roCachedFs); + + std::string path = "/tmp/ease/cache/cache_xattr/filexattr"; + auto xttarFile = srcFs->open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0644); + DEFER(delete xttarFile); + auto xattrFs = dynamic_cast(roCachedFs); + std::string name = "user.testxattr", value = "yes"; + char key[20], val[20]; + auto ret = xattrFs->setxattr(path.c_str(), name.c_str(), value.c_str(), value.size(), 0); + EXPECT_EQ(0, ret); + ret = xattrFs->listxattr(path.c_str(), key, 20); + EXPECT_EQ(0, std::memcmp(key, name.data(), ret)); + ret = xattrFs->getxattr(path.c_str(), key, val, 20); + EXPECT_EQ(value.size(), ret); + EXPECT_EQ(0, std::memcmp(val, value.data(), ret)); + ret = xattrFs->removexattr(path.c_str(), key); + EXPECT_EQ(0, ret); + + auto cachedFile = static_cast(roCachedFs->open(path.c_str(), 0, 0644)); + DEFER(delete cachedFile); + auto xattrfile = dynamic_cast(cachedFile); + ret = xattrfile->fsetxattr(name.c_str(), value.c_str(), value.size(), 0); + EXPECT_EQ(0, ret); + ret = xattrfile->flistxattr(key, 20); + EXPECT_EQ(0, std::memcmp(key, name.data(), ret)); + ret = xattrfile->fgetxattr(key, val, 20); + EXPECT_EQ(value.size(), ret); + EXPECT_EQ(0, std::memcmp(val, value.data(), ret)); + ret = xattrfile->fremovexattr(key); + EXPECT_EQ(0, ret); +} + +void* worker(void* arg) { + auto fs = (ICachedFileSystem*)arg; + char buffer[1024*1024]; + char buffersrc[1024*1024]; + std::vector offset; + for (auto i = 0; i < 2048; i++) { + offset.push_back(i * 1024 * 1024); + } + auto fd = ::open("/tmp/ease/cache/src_test/huge", O_RDONLY); + DEFER(::close(fd)); + auto f = fs->open("/huge", O_RDONLY); + DEFER(delete f); + for (int i=0;i<4;i++) { + std::random_shuffle(offset.begin(), offset.end()); + for (const auto &x : offset) { + ::pread(fd, buffersrc, 1024*1024, x); + f->pread(buffer, 1024*1024, x); + EXPECT_EQ(0, memcmp(buffer, buffersrc, 1024*1024)); + fs->get_pool()->evict(); + photon::thread_yield(); + } + } + return nullptr; +} + +TEST(CachedFS, write_while_full) { + std::string srcRoot("/tmp/ease/cache/src_test/"); + SetupTestDir(srcRoot); + system("dd if=/dev/urandom of=/tmp/ease/cache/src_test/huge bs=1M count=2048"); + + std::string root("/tmp/ease/cache/cache_test/"); + SetupTestDir(root); + auto srcFs = new_localfs_adaptor(srcRoot.c_str()); + auto mediaFs = new_localfs_adaptor(root.c_str()); + auto roCachedFs = new_full_file_cached_fs(srcFs, mediaFs, 1024 * 1024, 1, 100 * 1000 * 1, + 128ul * 1024 * 1024, nullptr, 0); + + std::vector jhs; + + for (int i=0;i<2;i++) { + jhs.emplace_back(photon::thread_enable_join(photon::thread_create(worker, roCachedFs))); + } + for (auto &x : jhs) { + photon::thread_join(x); + } +} + +TEST(CachedFS, fn_trans_func) { + std::string srcRoot("/tmp/ease/cache/src_test/"); + SetupTestDir(srcRoot); + system("mkdir /tmp/ease/cache/src_test/path_aaa/"); + system("mkdir /tmp/ease/cache/src_test/path_bbb/"); + system("dd if=/dev/urandom of=/tmp/ease/cache/src_test/path_aaa/sha256:test bs=1K count=1"); + system("cp /tmp/ease/cache/src_test/path_aaa/sha256:test /tmp/ease/cache/src_test/path_bbb/sha256:test"); + + std::string root("/tmp/ease/cache/cache_test/"); + SetupTestDir(root); + auto srcFs = new_localfs_adaptor(srcRoot.c_str()); + auto mediaFs = new_localfs_adaptor(root.c_str()); + + struct NameTransCB { + size_t fn_trans_sha256(std::string_view src, char *dest, size_t size) { + auto p = src.find("/sha256:"); + if (p == std::string_view::npos) { + return 0; + } + size_t len = src.size() - p; + if (len > size) { + LOG_WARN("filename length ` exceed `", len, size); + return 0; + } + strcpy(dest, src.data() + p); + return len; + } + }cb; + auto cachedFs = new_full_file_cached_fs(srcFs, mediaFs, 1024 * 1024, 1, 100 * 1000 * 1, + 128ul * 1024 * 1024, nullptr, 0, {&cb, &NameTransCB::fn_trans_sha256}); + char buf1[1024], buf2[1024]; + auto cachedFile1 = static_cast(cachedFs->open("/path_aaa/sha256:test", 0, 0644)); + auto cachedFile2 = static_cast(cachedFs->open("/path_bbb/sha256:test", 0, 0644)); + cachedFile1->read(buf1, 1024); + auto cFile = mediaFs->open("/sha256:test", 0, 0644); + cFile->read(buf2, 1024); + EXPECT_EQ(0, memcmp(buf1, buf2, 1024)); + auto cs1 = cachedFile1->get_store(); + auto cs2 = cachedFile2->get_store(); + EXPECT_EQ(cs1, cs2); +} + +} // namespace Cache + +int main(int argc, char** argv) { + log_output_level = ALOG_ERROR; + photon::vcpu_init(); + ::testing::InitGoogleTest(&argc, argv); + + int ret = photon::fd_events_init(photon::INIT_EVENT_EPOLL); + if (ret < 0) { + LOG_ERROR_RETURN(0, -1, "failed to init epoll subsystem"); + } + + ret = photon::libaio_wrapper_init(); + if (ret < 0) + LOG_ERROR_RETURN(0, -1, "failed to init libaio subsystem"); + + ret = RUN_ALL_TESTS(); + + photon::libaio_wrapper_fini(); + photon::fd_events_fini(); + return ret; +} diff --git a/src/overlaybd/cache/full_file_cache/test/random_generator.h b/src/overlaybd/cache/test/random_generator.h similarity index 52% rename from src/overlaybd/cache/full_file_cache/test/random_generator.h rename to src/overlaybd/cache/test/random_generator.h index 4dce7605..8662a1df 100644 --- a/src/overlaybd/cache/full_file_cache/test/random_generator.h +++ b/src/overlaybd/cache/test/random_generator.h @@ -24,44 +24,35 @@ namespace Cache { /* DataTypes for random generator */ template class RandomValueGen { -public: - RandomValueGen() { - } - virtual ~RandomValueGen() { - } - virtual T next() = 0; + public: + RandomValueGen() {} + virtual ~RandomValueGen() {} + virtual T next() = 0; }; // an uniform int random generator that produces inclusive-inclusive value range -class UniformInt32RandomGen : public RandomValueGen { -public: - UniformInt32RandomGen(uint32_t start, uint32_t end, int seed = 1213) - : gen_(seed), dis_(start, end) { - } - uint32_t next() override { - return dis_(gen_); - } - void seed(std::mt19937::result_type val) { - gen_.seed(val); - } - -private: - std::mt19937 gen_; - std::uniform_int_distribution dis_; +class UniformInt32RandomGen: public RandomValueGen { + public: + UniformInt32RandomGen() = default; + UniformInt32RandomGen(uint32_t start, uint32_t end, int seed = 1213) + : gen_(seed), dis_(start, end) {} + uint32_t next() override { return dis_(gen_); } + void seed(std::mt19937::result_type val) { gen_.seed(val); } + + private: + std::mt19937 gen_; + std::uniform_int_distribution dis_; }; class UniformCharRandomGen : public RandomValueGen { -public: - UniformCharRandomGen(uint32_t start, uint32_t end, int seed = 1213) - : gen_(seed), dis_(start, end) { - } - unsigned char next() { - return dis_(gen_); - } - -private: - std::mt19937 gen_; - std::uniform_int_distribution dis_; + public: + UniformCharRandomGen(uint32_t start, uint32_t end, int seed = 1213) + : gen_(seed), dis_(start, end) {} + unsigned char next() { return dis_(gen_); } + + private: + std::mt19937 gen_; + std::uniform_int_distribution dis_; }; -} // namespace Cache +} // namespace Cache diff --git a/src/overlaybd/gzindex/test/test.cpp b/src/overlaybd/gzindex/test/test.cpp index 940da52c..887e045e 100644 --- a/src/overlaybd/gzindex/test/test.cpp +++ b/src/overlaybd/gzindex/test/test.cpp @@ -18,7 +18,6 @@ #include "../../cache/pool_store.h" #include "../../cache/full_file_cache/cache_pool.h" #include "../../cache/cache.h" -#include "../../cache/frontend/cached_file.h" #include #include #include @@ -183,9 +182,9 @@ photon::fs::IFile *GzIndexTest::defile = nullptr; photon::fs::IFile *GzIndexTest::gzfile = nullptr; photon::fs::IFile *GzIndexTest::gzdata = nullptr; photon::fs::IFile *GzIndexTest::gzindex = nullptr; -const char *GzIndexTest::fn_defile = "fdata"; -const char *GzIndexTest::fn_gzdata = "fdata.gz"; -const char *GzIndexTest::fn_gzindex = "findex"; +const char *GzIndexTest::fn_defile = "/fdata"; +const char *GzIndexTest::fn_gzdata = "/fdata.gz"; +const char *GzIndexTest::fn_gzindex = "/findex"; TEST_F(GzIndexTest, pread) { std::vector t{ @@ -256,7 +255,8 @@ class GzCacheTest : public ::testing::Test { system(cmd.c_str()); lfs = photon::fs::new_localfs_adaptor("/tmp/gzip_src"); cfs = photon::fs::new_localfs_adaptor("/tmp/gzip_cache"); - pool = new Cache::FileCachePool(cfs, 4, 10000000, (uint64_t)1048576 * 4096, 1024 * 1024); + cfs1 = photon::fs::new_localfs_adaptor("/tmp/gzip_cache"); + pool = new Cache::FileCachePool(cfs1, 4, 10000000, (uint64_t)1048576 * 4096, 1024 * 1024); if (buildDataFile() != 0) { LOG_ERROR("failed to build ` and `", fn_defile, fn_gzdata); @@ -268,7 +268,7 @@ class GzCacheTest : public ::testing::Test { } lfs = FileSystem::new_full_file_cached_fs( lfs, cfs, 1024 * 1024, 1, 10000000, - (uint64_t)1048576 * 4096, nullptr, nullptr); + (uint64_t)1048576 * 4096, nullptr, 0, nullptr); gzdata = lfs->open(fn_gzdata, O_CREAT | O_TRUNC | O_RDWR, 0644); if (gzdata == nullptr) { LOG_ERROR("gzdata create failed"); @@ -288,7 +288,11 @@ class GzCacheTest : public ::testing::Test { exit(-1); } auto io_alloc = new IOAlloc; - gzfile = new Cache::CachedFile(gzfile, store, st.st_size, 4 * 1024, 4, io_alloc, nullptr); + store->set_src_file(gzfile); + store->set_page_size(4096); + store->set_allocator(io_alloc); + store->set_actual_size(st.st_size); + gzfile = new_cached_file(store, 4096, nullptr); if (gzfile == nullptr) { LOG_ERROR("failed create new cached gzip file"); @@ -311,6 +315,7 @@ class GzCacheTest : public ::testing::Test { if (lfs->access(fn_gzindex, 0) == 0) { lfs->unlink(fn_gzindex); } + delete pool; delete lfs; } @@ -339,6 +344,7 @@ class GzCacheTest : public ::testing::Test { private: static photon::fs::IFileSystem *lfs; static photon::fs::IFileSystem *cfs; + static photon::fs::IFileSystem *cfs1; static FileSystem::ICachePool *pool; static photon::fs::IFile *gzdata; static photon::fs::IFile *gzindex; @@ -414,14 +420,15 @@ class GzCacheTest : public ::testing::Test { photon::fs::IFileSystem *GzCacheTest::lfs = nullptr; photon::fs::IFileSystem *GzCacheTest::cfs = nullptr; +photon::fs::IFileSystem *GzCacheTest::cfs1 = nullptr; FileSystem::ICachePool *GzCacheTest::pool = nullptr; photon::fs::IFile *GzCacheTest::defile = nullptr; photon::fs::IFile *GzCacheTest::gzfile = nullptr; photon::fs::IFile *GzCacheTest::gzdata = nullptr; photon::fs::IFile *GzCacheTest::gzindex = nullptr; -const char *GzCacheTest::fn_defile = "fdata"; -const char *GzCacheTest::fn_gzdata = "fdata.gz"; -const char *GzCacheTest::fn_gzindex = "findex"; +const char *GzCacheTest::fn_defile = "/fdata"; +const char *GzCacheTest::fn_gzdata = "/fdata.gz"; +const char *GzCacheTest::fn_gzindex = "/findex"; bool check_in_interval(int val, int l, int r) { return l <= val && val < r; diff --git a/src/overlaybd/registryfs/registryfs.cpp b/src/overlaybd/registryfs/registryfs.cpp index d19238f2..70719dfb 100644 --- a/src/overlaybd/registryfs/registryfs.cpp +++ b/src/overlaybd/registryfs/registryfs.cpp @@ -250,7 +250,7 @@ class RegistryFSImpl : public RegistryFS { // unexpected situation if (!scope.empty()) m_scope_token.release(scope, true); - LOG_ERROR_RETURN(0, nullptr, "Failed to get actual url ", VALUE(url), VALUE(ret)); + LOG_ERROR_RETURN(0, nullptr, "Failed to get actual url ", VALUE(code), VALUE(url), VALUE(ret)); } virtual int setAccelerateAddress(const char* addr = "") override { @@ -380,7 +380,8 @@ class RegistryFileImpl : public photon::fs::VirtualReadOnlyFile { size_t m_filesize; RegistryFileImpl(const char *filename, const char *url, RegistryFSImpl *fs, uint64_t timeout) - : m_filename(filename), m_url(url), m_fs(fs), m_timeout(timeout) { + : m_filename(filename), m_fs(fs), m_timeout(timeout) { + m_url = url[0] == '/' ? url + 1 : url; m_filesize = 0; } diff --git a/src/overlaybd/registryfs/registryfs_v2.cpp b/src/overlaybd/registryfs/registryfs_v2.cpp index 70abac7d..82baaf22 100644 --- a/src/overlaybd/registryfs/registryfs_v2.cpp +++ b/src/overlaybd/registryfs/registryfs_v2.cpp @@ -378,7 +378,9 @@ class RegistryFileImpl_v2 : public photon::fs::VirtualReadOnlyFile { size_t m_filesize = 0; RegistryFileImpl_v2(const char *url, RegistryFSImpl_v2 *fs, uint64_t timeout) - : m_url(url), m_fs(fs), m_timeout(timeout) {} + : m_fs(fs), m_timeout(timeout) { + m_url = url[0] == '/' ? url + 1 : url; + } virtual IFileSystem *filesystem() override { return m_fs; @@ -467,7 +469,7 @@ inline IFile *RegistryFSImpl_v2::open(const char *pathname, int) { int ret = file->fstat(&buf); if (ret < 0) { delete file; - LOG_ERROR_RETURN(0, nullptr, "failed to open and stat registry file `, ret `", pathname, ret); + LOG_ERRNO_RETURN(0, nullptr, "failed to open and stat registry file `, ret `", pathname, ret); } return file; } diff --git a/src/overlaybd/tar/tar_file.cpp b/src/overlaybd/tar/tar_file.cpp index 57552ead..d0cb26ff 100644 --- a/src/overlaybd/tar/tar_file.cpp +++ b/src/overlaybd/tar/tar_file.cpp @@ -309,7 +309,7 @@ int is_tar_file(IFile *file) { TarHeader th_buf; auto ret = file->pread(&th_buf, T_BLOCKSIZE, 0); if (ret < 0) { - LOG_ERROR_RETURN(0, -1, "read tar file header failed"); + LOG_ERRNO_RETURN(0, -1, "read tar file header failed"); } else if (ret != T_BLOCKSIZE) { LOG_WARN("read tar file header error, expect `, ret `", T_BLOCKSIZE, ret); return 0; diff --git a/src/overlaybd/zfile/test/test.cpp b/src/overlaybd/zfile/test/test.cpp index 4ee05a22..a506814b 100644 --- a/src/overlaybd/zfile/test/test.cpp +++ b/src/overlaybd/zfile/test/test.cpp @@ -30,7 +30,6 @@ #include "../compressor.cpp" #include -#include #include #include #include From 6e54f166bf4371688f722f7b6b4639f2efb89765 Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Wed, 22 Nov 2023 15:01:46 +0800 Subject: [PATCH 28/31] update photon to v0.6.12 Signed-off-by: liulanzheng --- CMake/Findphoton.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index 4c123220..ea946255 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -4,7 +4,7 @@ set(FETCHCONTENT_QUIET false) FetchContent_Declare( photon GIT_REPOSITORY https://github.com/alibaba/PhotonLibOS.git - GIT_TAG v0.6.11 + GIT_TAG v0.6.12 ) if(BUILD_TESTING) From 09f4b73f94ca3299fd7e046826d06a1561499fdb Mon Sep 17 00:00:00 2001 From: liulanzheng Date: Wed, 29 Nov 2023 16:28:31 +0800 Subject: [PATCH 29/31] update photon to v0.6.13 Signed-off-by: liulanzheng --- CMake/Findphoton.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMake/Findphoton.cmake b/CMake/Findphoton.cmake index ea946255..87ed5730 100644 --- a/CMake/Findphoton.cmake +++ b/CMake/Findphoton.cmake @@ -4,7 +4,7 @@ set(FETCHCONTENT_QUIET false) FetchContent_Declare( photon GIT_REPOSITORY https://github.com/alibaba/PhotonLibOS.git - GIT_TAG v0.6.12 + GIT_TAG v0.6.13 ) if(BUILD_TESTING) From 8173faf9f92260502ccb3efd88d3327213319093 Mon Sep 17 00:00:00 2001 From: "zhuangbowei.zbw" Date: Wed, 29 Nov 2023 16:31:07 +0800 Subject: [PATCH 30/31] [bugfix] registry v2 segfault Signed-off-by: zhuangbowei.zbw --- src/overlaybd/registryfs/registryfs_v2.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/overlaybd/registryfs/registryfs_v2.cpp b/src/overlaybd/registryfs/registryfs_v2.cpp index 82baaf22..6726fa05 100644 --- a/src/overlaybd/registryfs/registryfs_v2.cpp +++ b/src/overlaybd/registryfs/registryfs_v2.cpp @@ -208,8 +208,10 @@ class RegistryFSImpl_v2 : public RegistryFS { HTTP_OP op(m_client, Verb::GET, url); op.follow = 0; op.retry = 0; - op.req.headers.insert(kAuthHeaderKey, "Bearer "); - op.req.headers.value_append(*token); + if (token && !token->empty()) { + op.req.headers.insert(kAuthHeaderKey, "Bearer "); + op.req.headers.value_append(*token); + } op.timeout = tmo.timeout(); op.call(); code = op.status_code; From 69f9d1dfd5acabe4cf907e22df893ea0e8e27e2d Mon Sep 17 00:00:00 2001 From: "zhuangbowei.zbw" Date: Wed, 29 Nov 2023 16:48:18 +0800 Subject: [PATCH 31/31] [bugfix] fix gzip cache Signed-off-by: zhuangbowei.zbw --- src/overlaybd/cache/gzip_cache/cached_fs.cpp | 7 ++- src/overlaybd/gzindex/test/test.cpp | 65 +++++++------------- 2 files changed, 29 insertions(+), 43 deletions(-) diff --git a/src/overlaybd/cache/gzip_cache/cached_fs.cpp b/src/overlaybd/cache/gzip_cache/cached_fs.cpp index 07181c3f..d28de2ae 100644 --- a/src/overlaybd/cache/gzip_cache/cached_fs.cpp +++ b/src/overlaybd/cache/gzip_cache/cached_fs.cpp @@ -16,6 +16,7 @@ #include "cached_fs.h" #include "../full_file_cache/cache_pool.h" #include "../cache.h" +#include namespace Cache { @@ -34,7 +35,11 @@ class GzipCachedFsImpl : public GzipCachedFs { if (!file) { LOG_ERRNO_RETURN(0, nullptr, "Open source gzfile failed"); } - auto cache_store = pool_->open(file_name, O_RDWR | O_CREAT, 0644); + estring fn = file_name; + if (fn[0] != '/') { + fn = estring().appends("/", fn); + } + auto cache_store = pool_->open(fn, O_RDWR | O_CREAT, 0644); if (cache_store == nullptr) { delete file; LOG_ERRNO_RETURN(0, nullptr, "file cache pool open file failed, name : `", file_name); diff --git a/src/overlaybd/gzindex/test/test.cpp b/src/overlaybd/gzindex/test/test.cpp index 887e045e..1c4fa18e 100644 --- a/src/overlaybd/gzindex/test/test.cpp +++ b/src/overlaybd/gzindex/test/test.cpp @@ -15,8 +15,7 @@ */ #include "../gzfile.h" -#include "../../cache/pool_store.h" -#include "../../cache/full_file_cache/cache_pool.h" +#include "../../cache/gzip_cache/cached_fs.h" #include "../../cache/cache.h" #include #include @@ -251,13 +250,11 @@ class GzCacheTest : public ::testing::Test { system(cmd.c_str()); cmd = std::string("mkdir -p /tmp/gzip_src"); system(cmd.c_str()); - cmd = std::string("mkdir -p /tmp/gzip_cache"); + cmd = std::string("mkdir -p /tmp/gzip_cache_compress"); + system(cmd.c_str()); + cmd = std::string("mkdir -p /tmp/gzip_cache_decompress"); system(cmd.c_str()); lfs = photon::fs::new_localfs_adaptor("/tmp/gzip_src"); - cfs = photon::fs::new_localfs_adaptor("/tmp/gzip_cache"); - cfs1 = photon::fs::new_localfs_adaptor("/tmp/gzip_cache"); - pool = new Cache::FileCachePool(cfs1, 4, 10000000, (uint64_t)1048576 * 4096, 1024 * 1024); - if (buildDataFile() != 0) { LOG_ERROR("failed to build ` and `", fn_defile, fn_gzdata); exit(-1); @@ -266,34 +263,27 @@ class GzCacheTest : public ::testing::Test { LOG_ERROR("failed to build gz index: `", fn_gzindex); exit(-1); } + + auto mediafs = photon::fs::new_localfs_adaptor("/tmp/gzip_cache_compress"); lfs = FileSystem::new_full_file_cached_fs( - lfs, cfs, 1024 * 1024, 1, 10000000, + lfs, mediafs, 1024 * 1024, 1, 10000000, (uint64_t)1048576 * 4096, nullptr, 0, nullptr); - gzdata = lfs->open(fn_gzdata, O_CREAT | O_TRUNC | O_RDWR, 0644); + delete gzdata; + gzdata = lfs->open(fn_gzdata, O_RDONLY, 0644); if (gzdata == nullptr) { LOG_ERROR("gzdata create failed"); exit(-1); } gzfile = new_gzfile(gzdata, gzindex); - struct stat st = {}; - if (gzfile) { - auto ok = gzfile->fstat(&st); - if (ok == -1) { - exit(-1); - } - } - auto store = pool->open(fn_defile, O_RDWR | O_CREAT, 0644); - if (store == nullptr) { - delete gzfile; + if (gzfile == nullptr) { + LOG_ERROR("gzfile create failed"); exit(-1); } - auto io_alloc = new IOAlloc; - store->set_src_file(gzfile); - store->set_page_size(4096); - store->set_allocator(io_alloc); - store->set_actual_size(st.st_size); - gzfile = new_cached_file(store, 4096, nullptr); + + mediafs = photon::fs::new_localfs_adaptor("/tmp/gzip_cache_decompress"); + cfs = Cache::new_gzip_cached_fs(mediafs, 1024 * 1024, 4, 10000000, (uint64_t)1048576 * 4096, nullptr); + gzfile = cfs->open_cached_gzip_file(gzfile, fn_defile); if (gzfile == nullptr) { LOG_ERROR("failed create new cached gzip file"); exit(-1); @@ -306,17 +296,12 @@ class GzCacheTest : public ::testing::Test { delete defile; delete gzfile; - if (lfs->access(fn_defile, 0) == 0) { - lfs->unlink(fn_defile); - } - if (lfs->access(fn_gzdata, 0) == 0) { - lfs->unlink(fn_gzdata); - } - if (lfs->access(fn_gzindex, 0) == 0) { - lfs->unlink(fn_gzindex); - } - delete pool; + lfs->unlink(fn_defile); + lfs->unlink(fn_gzdata); + lfs->unlink(fn_gzindex); + delete lfs; + delete cfs; } void test_pread(PreadTestCase t) { @@ -343,9 +328,7 @@ class GzCacheTest : public ::testing::Test { } private: static photon::fs::IFileSystem *lfs; - static photon::fs::IFileSystem *cfs; - static photon::fs::IFileSystem *cfs1; - static FileSystem::ICachePool *pool; + static Cache::GzipCachedFs *cfs; static photon::fs::IFile *gzdata; static photon::fs::IFile *gzindex; @@ -419,9 +402,7 @@ class GzCacheTest : public ::testing::Test { }; photon::fs::IFileSystem *GzCacheTest::lfs = nullptr; -photon::fs::IFileSystem *GzCacheTest::cfs = nullptr; -photon::fs::IFileSystem *GzCacheTest::cfs1 = nullptr; -FileSystem::ICachePool *GzCacheTest::pool = nullptr; +Cache::GzipCachedFs *GzCacheTest::cfs = nullptr; photon::fs::IFile *GzCacheTest::defile = nullptr; photon::fs::IFile *GzCacheTest::gzfile = nullptr; photon::fs::IFile *GzCacheTest::gzdata = nullptr; @@ -447,7 +428,7 @@ TEST_F(GzCacheTest, cache_store) { DEFER(delete []cbuf1); DEFER(delete []cbuf2); auto fp1 = fopen("/tmp/gzip_src/fdata", "r"); - auto fp2 = fopen("/tmp/gzip_cache/fdata", "r"); + auto fp2 = fopen("/tmp/gzip_cache_decompress/fdata", "r"); DEFER(fclose(fp1)); DEFER(fclose(fp2)); fread(cbuf1, 1, vsize, fp1);