From 406d3621175f726f3cac68d8cd1d148bd75698fb Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 20 Feb 2024 16:29:58 -0600 Subject: [PATCH 01/18] Add an object size cache to slice plugin --- plugins/slice/CMakeLists.txt | 1 + plugins/slice/cache.cc | 91 +++++++++++++++++++++++ plugins/slice/cache.h | 63 ++++++++++++++++ plugins/slice/unit-tests/CMakeLists.txt | 5 ++ plugins/slice/unit-tests/test_cache.cc | 97 +++++++++++++++++++++++++ 5 files changed, 257 insertions(+) create mode 100644 plugins/slice/cache.cc create mode 100644 plugins/slice/cache.h create mode 100644 plugins/slice/unit-tests/test_cache.cc diff --git a/plugins/slice/CMakeLists.txt b/plugins/slice/CMakeLists.txt index f63bd2efe4e..3c79739c14a 100644 --- a/plugins/slice/CMakeLists.txt +++ b/plugins/slice/CMakeLists.txt @@ -31,6 +31,7 @@ add_atsplugin( slice.cc transfer.cc util.cc + cache.cc ) target_link_libraries(slice PRIVATE PCRE::PCRE) diff --git a/plugins/slice/cache.cc b/plugins/slice/cache.cc new file mode 100644 index 00000000000..d353345a57b --- /dev/null +++ b/plugins/slice/cache.cc @@ -0,0 +1,91 @@ +/** @file cache.cc + + Metadata cache to store object sizes. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "cache.h" +#include "ts/ts.h" +#include + +ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) + : _cache_size(cache_size), _urls(cache_size), _object_sizes(cache_size, 0), _visits(cache_size, false) +{ +} + +std::optional +ObjectSizeCache::get(const std::string_view url) +{ + if (auto it = _index.find(url); it != _index.end()) { + // Cache hit + cache_size_type i = it->second; + _visits[i] = true; + assert(url == _urls[i]); + return _object_sizes[i]; + } else { + // Cache miss + return std::nullopt; + } +} + +void +ObjectSizeCache::set(const std::string_view url, uint64_t object_size) +{ + cache_size_type i; + if (auto it = _index.find(url); it != _index.end()) { + // Already exists in cache. Overwrite. + i = it->second; + } else { + // Doesn't exist in cache. Evict something else. + find_eviction_slot(); + i = _hand; + _urls[i] = url; + _index[_urls[i]] = _hand; + _hand++; + if (_hand >= _cache_size) { + _hand = 0; + } + } + _object_sizes[i] = object_size; +} + +/** + * @brief Make _hand point to the next entry that should be replaced, and clear that entry if it exists. + * + */ +void +ObjectSizeCache::find_eviction_slot() +{ + while (_visits[_hand]) { + _visits[_hand] = false; + _hand++; + if (_hand >= _cache_size) { + _hand = 0; + } + } + + std::string_view evicted_url = _urls[_hand]; + if (!evicted_url.empty()) { + auto it = _index.find(evicted_url); + assert(it != _index.end()); + _index.erase(it); + _urls[_hand].erase(); + } +} diff --git a/plugins/slice/cache.h b/plugins/slice/cache.h new file mode 100644 index 00000000000..c55a2f6e860 --- /dev/null +++ b/plugins/slice/cache.h @@ -0,0 +1,63 @@ +/** @file cache.h + + Metadata cache to store object sizes. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#include +#include + +class ObjectSizeCache +{ +public: + using cache_size_type = size_t; + using object_size_type = uint64_t; + + ObjectSizeCache(cache_size_type cache_size); + + /** + * @brief Get an object size from cache. + * + * @param url The URL of the object + * @return std::optional If the object size was found, return the size of the object. If not, return std::nullopt. + */ + std::optional get(const std::string_view url); + + /** + * @brief Add an object size to cache. + * + * @param url The URL of the object + * @param object_size The size of the object + */ + void set(const std::string_view url, object_size_type object_size); + +private: + void find_eviction_slot(); + + cache_size_type _cache_size; + cache_size_type _hand{0}; + std::vector _urls; + std::vector _object_sizes; + std::vector _visits; + std::unordered_map _index; +}; diff --git a/plugins/slice/unit-tests/CMakeLists.txt b/plugins/slice/unit-tests/CMakeLists.txt index 9ddec81ee6f..7a5aeee69e8 100644 --- a/plugins/slice/unit-tests/CMakeLists.txt +++ b/plugins/slice/unit-tests/CMakeLists.txt @@ -29,3 +29,8 @@ add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc) target_compile_definitions(test_config PRIVATE UNITTEST) target_link_libraries(test_config PRIVATE PCRE::PCRE catch2::catch2 ts::tsutil) add_test(NAME test_config COMMAND test_config) + +add_executable(test_cache test_cache.cc ${PROJECT_SOURCE_DIR}/cache.cc) +target_compile_definitions(test_cache PRIVATE UNITTEST) +target_link_libraries(test_cache PRIVATE catch2::catch2 ts::tsutil) +add_test(NAME test_cache COMMAND test_cache) diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc new file mode 100644 index 00000000000..76abd22c974 --- /dev/null +++ b/plugins/slice/unit-tests/test_cache.cc @@ -0,0 +1,97 @@ +/** @file test_cache.cc + + Unit tests for metadata cache + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#define CATCH_CONFIG_MAIN /* include main function */ +#include "catch.hpp" /* catch unit-test framework */ +#include "../cache.h" + +using namespace std::string_view_literals; +TEST_CASE("cache miss", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + std::optional res = cache.get("example.com"sv); + CHECK(res == std::nullopt); +} + +TEST_CASE("cache hit", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + cache.set("example.com/123"sv, 123); + std::optional res2 = cache.get("example.com/123"sv); + CHECK(res2.value() == 123); +} + +TEST_CASE("eviction", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + cache.set(ss.str(), i); + } + size_t found = 0; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == i); + found++; + } + } + REQUIRE(found == cache_size); +} + +TEST_CASE("hit rate", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + + size_t hits = 0; + size_t misses = 0; + std::mt19937 gen; + std::poisson_distribution d{cache_size}; + + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + uint64_t obj = d(gen); + + ss << "http://example.com/" << obj; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == obj); + hits++; + } else { + cache.set(ss.str(), obj); + misses++; + } + } + + INFO("Hits: " << hits); + INFO("Misses: " << misses); + REQUIRE(hits > cache_size * 50); +} From 1f95d33d743b7bd3bcbfed4d163a3a8262337e5f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Feb 2024 18:37:01 -0600 Subject: [PATCH 02/18] Conditional Slicing Add an option to slice plugin to only slice large objects. --- plugins/slice/Config.cc | 72 ++++++++++++- plugins/slice/Config.h | 14 ++- plugins/slice/cache.cc | 19 +++- plugins/slice/cache.h | 11 ++ plugins/slice/server.cc | 22 ++++ plugins/slice/slice.cc | 131 +++++++++++++++++++----- plugins/slice/unit-tests/CMakeLists.txt | 2 +- plugins/slice/unit-tests/test_cache.cc | 65 +++++++++++- 8 files changed, 299 insertions(+), 37 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 3a7ef883f70..625db50c992 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -18,6 +18,7 @@ #include "Config.h" +#include #include #include #include @@ -123,13 +124,15 @@ Config::fromArgs(int const argc, char const *const argv[]) {const_cast("blockbytes-test"), required_argument, nullptr, 't'}, {const_cast("prefetch-count"), required_argument, nullptr, 'f'}, {const_cast("strip-range-for-head"), no_argument, nullptr, 'h'}, + {const_cast("minimum-size"), required_argument, nullptr, 'm'}, + {const_cast("metadata-cache-size"), required_argument, nullptr, 'z'}, {nullptr, 0, nullptr, 0 }, }; // getopt assumes args start at '1' so this hack is needed char *const *argvp = (const_cast(argv) - 1); for (;;) { - int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lp:r:s:t:", longopts, nullptr); + int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lm:p:r:s:t:z:", longopts, nullptr); if (-1 == opt) { break; } @@ -228,6 +231,25 @@ Config::fromArgs(int const argc, char const *const argv[]) case 'h': { m_head_strip_range = true; } break; + case 'm': { + int64_t const bytesread = bytesFrom(optarg); + if (bytesread < 0) { + DEBUG_LOG("Invalid minimum-size: %s", optarg); + } + m_min_size_to_slice = bytesread; + DEBUG_LOG("Only slicing objects %" PRIu64 " bytes or larger", m_min_size_to_slice); + } break; + case 'z': { + try { + size_t size = std::stoul(optarg); + setCacheSize(size); + DEBUG_LOG("Metadata cache size: %zu entries", size); + } catch (std::invalid_argument e) { + ERROR_LOG("Invalid metadata cache size argument: %s", optarg); + } catch (std::out_of_range e) { + ERROR_LOG("Metadata cache size out of range: %s", optarg); + } + } break; default: break; } @@ -256,6 +278,15 @@ Config::fromArgs(int const argc, char const *const argv[]) DEBUG_LOG("Using default slice skip header %s", m_skip_header.c_str()); } + if (m_min_size_to_slice > 0) { + if (m_oscache.has_value()) { + DEBUG_LOG("Metadata cache size: %zu", m_oscache->cache_size()); + } else { + ERROR_LOG("An metadata cache is required when --minimum-size is specified! Using a default size of 16384 entries."); + setCacheSize(16384); + } + } + return true; } @@ -309,3 +340,42 @@ Config::matchesRegex(char const *const url, int const urllen) const return matches; } + +void +Config::setCacheSize(size_t entries) +{ + if (entries == 0) { + m_oscache.reset(); + } else { + m_oscache.emplace(entries); + } +} + +bool +Config::is_known_large_obj(std::string_view url) +{ + if (m_min_size_to_slice <= 0) { + // If conditional slicing is not set, all objects are large enough to slice + return true; + } + + assert(m_oscache.has_value()); // object size cache is always present when conditionally slicing + std::optional size = m_oscache->get(url); + if (size.has_value()) { + DEBUG_LOG("Found url in cache: %.*s -> %" PRIu64, static_cast(url.size()), url.data(), size.value()); + if (size.value() >= m_min_size_to_slice) { + return true; + } + } + + return false; +} + +void +Config::size_cache_add(std::string_view url, uint64_t size) +{ + if (m_oscache) { + DEBUG_LOG("Adding url to cache: %.*s -> %" PRIu64, static_cast(url.size()), url.data(), size); + m_oscache->set(url, size); + } +} diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index c1e630447ad..e303b2ab8e4 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -19,6 +19,7 @@ #pragma once #include "slice.h" +#include "cache.h" #ifdef HAVE_PCRE_PCRE_H #include @@ -45,8 +46,9 @@ struct Config { int m_paceerrsecs{0}; // -1 disable logging, 0 no pacing, max 60s int m_prefetchcount{0}; // 0 disables prefetching enum RefType { First, Relative }; - RefType m_reftype{First}; // reference slice is relative to request - bool m_head_strip_range{false}; // strip range header for head requests + RefType m_reftype{First}; // reference slice is relative to request + bool m_head_strip_range{false}; // strip range header for head requests + uint64_t m_min_size_to_slice{0}; // Only strip objects larger than this std::string m_skip_header; std::string m_crr_ims_header; @@ -73,7 +75,15 @@ struct Config { // If no null reg, true, otherwise check against regex bool matchesRegex(char const *const url, int const urllen) const; + // Add an object size to cache + void size_cache_add(std::string_view url, uint64_t size); + + // Did we cache this internally as a small object? + bool is_known_large_obj(std::string_view url); + private: TSHRTime m_nextlogtime{0}; // next time to log in ns std::mutex m_mutex; + std::optional m_oscache; + void setCacheSize(size_t entries); }; diff --git a/plugins/slice/cache.cc b/plugins/slice/cache.cc index d353345a57b..c82ac932505 100644 --- a/plugins/slice/cache.cc +++ b/plugins/slice/cache.cc @@ -22,7 +22,6 @@ */ #include "cache.h" -#include "ts/ts.h" #include ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) @@ -33,14 +32,17 @@ ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) std::optional ObjectSizeCache::get(const std::string_view url) { + std::lock_guard lock{_mutex}; if (auto it = _index.find(url); it != _index.end()) { // Cache hit + _cache_hits++; cache_size_type i = it->second; _visits[i] = true; assert(url == _urls[i]); return _object_sizes[i]; } else { // Cache miss + _cache_misses++; return std::nullopt; } } @@ -48,12 +50,15 @@ ObjectSizeCache::get(const std::string_view url) void ObjectSizeCache::set(const std::string_view url, uint64_t object_size) { + std::lock_guard lock{_mutex}; cache_size_type i; if (auto it = _index.find(url); it != _index.end()) { // Already exists in cache. Overwrite. + _cache_write_hits++; i = it->second; } else { // Doesn't exist in cache. Evict something else. + _cache_write_misses++; find_eviction_slot(); i = _hand; _urls[i] = url; @@ -89,3 +94,15 @@ ObjectSizeCache::find_eviction_slot() _urls[_hand].erase(); } } +std::tuple +ObjectSizeCache::cache_stats() +{ + std::lock_guard lock{_mutex}; + return {_cache_hits, _cache_misses, _cache_write_hits, _cache_write_misses}; +} + +ObjectSizeCache::cache_size_type +ObjectSizeCache::cache_size() +{ + return _cache_size; +} diff --git a/plugins/slice/cache.h b/plugins/slice/cache.h index c55a2f6e860..fcd53a36e41 100644 --- a/plugins/slice/cache.h +++ b/plugins/slice/cache.h @@ -26,6 +26,7 @@ #include #include #include +#include class ObjectSizeCache { @@ -51,6 +52,10 @@ class ObjectSizeCache */ void set(const std::string_view url, object_size_type object_size); + cache_size_type cache_size(); + + std::tuple cache_stats(); + private: void find_eviction_slot(); @@ -60,4 +65,10 @@ class ObjectSizeCache std::vector _object_sizes; std::vector _visits; std::unordered_map _index; + std::mutex _mutex; + + uint64_t _cache_hits = 0; + uint64_t _cache_misses = 0; + uint64_t _cache_write_hits = 0; + uint64_t _cache_write_misses = 0; }; diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index 2770d2a85d5..760bf4837f4 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -23,6 +23,7 @@ #include "HttpHeader.h" #include "response.h" #include "transfer.h" +#include "ts/apidefs.h" #include "util.h" #include @@ -87,6 +88,24 @@ enum HeaderState { Passthru, }; +static void +update_object_size(TSHttpTxn txnp, int64_t size, Config &config) +{ + int urllen = 0; + char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); + if (urlstr != nullptr) { + if (size <= 0) { + DEBUG_LOG("Ignoring invalid content length for %.*s: %lld", urllen, urlstr, size); + return; + } + + config.size_cache_add({urlstr, static_cast(urllen)}, static_cast(size)); + TSfree(urlstr); + } else { + ERROR_LOG("Could not get URL from transaction."); + } +} + HeaderState handleFirstServerHeader(Data *const data, TSCont const contp) { @@ -121,6 +140,7 @@ handleFirstServerHeader(Data *const data, TSCont const contp) } DEBUG_LOG("Passthru bytes: header: %" PRId64 " body: %" PRId64, hlen, clen); if (clen != INT64_MAX) { + update_object_size(data->m_txnp, clen, *data->m_config); TSVIONBytesSet(output_vio, hlen + clen); } else { TSVIONBytesSet(output_vio, clen); @@ -140,6 +160,8 @@ handleFirstServerHeader(Data *const data, TSCont const contp) return HeaderState::Fail; } + update_object_size(data->m_txnp, blockcr.m_length, *data->m_config); + // set the resource content length from block response data->m_contentlen = blockcr.m_length; diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 0b62658262a..3ba8194776a 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -23,31 +23,55 @@ #include "HttpHeader.h" #include "intercept.h" +#include "ts/apidefs.h" #include "ts/remap.h" #include "ts/ts.h" +#include #include -#include namespace { +struct PluginInfo { + Config config; + TSCont read_obj_size_contp; +}; + Config globalConfig; +TSCont global_read_obj_size_contp; + +static bool +should_skip_this_obj(TSHttpTxn txnp, Config *const config) +{ + int len = 0; + char *const urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &len); + + if (!config->is_known_large_obj({urlstr, static_cast(len)})) { + DEBUG_LOG("Not a known large object, not slicing: %.*s", len, urlstr); + return true; + } + + return false; +} bool -read_request(TSHttpTxn txnp, Config *const config) +read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) { DEBUG_LOG("slice read_request"); TxnHdrMgr hdrmgr; hdrmgr.populateFrom(txnp, TSHttpTxnClientReqGet); HttpHeader const header(hdrmgr.m_buffer, hdrmgr.m_lochdr); + bool ret = false; + bool should_read_obj_size = false; + if (TS_HTTP_METHOD_GET == header.method() || TS_HTTP_METHOD_HEAD == header.method() || TS_HTTP_METHOD_PURGE == header.method()) { if (!header.hasKey(config->m_skip_header.data(), config->m_skip_header.size())) { // check if any previous plugin has monkeyed with the transaction status TSHttpStatus const txnstat = TSHttpTxnStatusGet(txnp); if (TS_HTTP_STATUS_NONE != txnstat) { DEBUG_LOG("txn status change detected (%d), skipping plugin\n", static_cast(txnstat)); - return false; + goto out; } if (config->hasRegex()) { @@ -58,7 +82,7 @@ read_request(TSHttpTxn txnp, Config *const config) if (!shouldslice) { DEBUG_LOG("request failed regex, not slicing: '%.*s'", urllen, urlstr); TSfree(urlstr); - return false; + goto out; } DEBUG_LOG("request passed regex, slicing: '%.*s'", urllen, urlstr); @@ -75,11 +99,11 @@ read_request(TSHttpTxn txnp, Config *const config) // connection back into ATS sockaddr const *const ip = TSHttpTxnClientAddrGet(txnp); if (nullptr == ip) { - return false; + goto out; } TSAssert(nullptr != config); - Data *const data = new Data(config); + std::unique_ptr data = std::make_unique(config); data->m_method_type = header.method(); data->m_txnp = txnp; @@ -90,16 +114,20 @@ read_request(TSHttpTxn txnp, Config *const config) } else if (AF_INET6 == ip->sa_family) { memcpy(&data->m_client_ip, ip, sizeof(sockaddr_in6)); } else { - delete data; - return false; + goto out; } // need to reset the HOST field for global plugin data->m_hostlen = sizeof(data->m_hostname) - 1; if (!header.valueForKey(TS_MIME_FIELD_HOST, TS_MIME_LEN_HOST, data->m_hostname, &data->m_hostlen)) { DEBUG_LOG("Unable to get hostname from header"); - delete data; - return false; + goto out; + } + + // check if object is previously known to be too small to slice + if (should_skip_this_obj(txnp, config)) { + should_read_obj_size = true; + goto out; } // is the plugin configured to use a remap host? @@ -118,8 +146,7 @@ read_request(TSHttpTxn txnp, Config *const config) if (TS_SUCCESS != rcode) { ERROR_LOG("Error cloning pristine url"); TSMBufferDestroy(newbuf); - delete data; - return false; + goto out; } data->m_urlbuf = newbuf; @@ -152,8 +179,7 @@ read_request(TSHttpTxn txnp, Config *const config) TSHandleMLocRelease(newbuf, nullptr, newloc); } TSMBufferDestroy(newbuf); - delete data; - return false; + goto out; } data->m_urlbuf = newbuf; @@ -173,8 +199,8 @@ read_request(TSHttpTxn txnp, Config *const config) // we'll intercept this GET and do it ourselves TSMutex const mutex = TSContMutexGet(reinterpret_cast(txnp)); - TSCont const icontp(TSContCreate(intercept_hook, mutex)); - TSContDataSet(icontp, (void *)data); + TSCont const icontp(TSContCreate(intercept_hook, mutex)); + TSContDataSet(icontp, data.release()); // Skip remap and remap rule requirement TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_SKIP_REMAPPING, true); @@ -182,13 +208,53 @@ read_request(TSHttpTxn txnp, Config *const config) // Grab the transaction TSHttpTxnIntercept(icontp, txnp); - return true; + ret = true; } else { DEBUG_LOG("slice passing GET or HEAD request through to next plugin"); } } - return false; +out: + if (should_read_obj_size) { + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_obj_size_contp); + } + return ret; +} + +static int +read_obj_size(TSCont contp, TSEvent event, void *edata) +{ + TSHttpTxn txnp = static_cast(edata); + PluginInfo *info = static_cast(TSContDataGet(contp)); + + int urllen = 0; + char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); + if (urlstr != nullptr) { + { + TxnHdrMgr response; + response.populateFrom(txnp, TSHttpTxnServerRespGet); + HttpHeader const resp_header(response.m_buffer, response.m_lochdr); + char constr[1024]; + int conlen; + bool const hasContentLength( + resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); + if (hasContentLength) { + uint64_t content_length; + + [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); + if (ec == std::errc()) { + info->config.size_cache_add({urlstr, static_cast(urllen)}, content_length); + } else { + ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); + } + } + } + TSfree(urlstr); + } + + // Reenable and continue with the state machine. + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; } int @@ -199,7 +265,7 @@ global_read_request_hook(TSCont // contp void *edata) { TSHttpTxn const txnp = static_cast(edata); - read_request(txnp, &globalConfig); + read_request(txnp, &globalConfig, global_read_obj_size_contp); TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); return 0; } @@ -216,9 +282,9 @@ DbgCtl dbg_ctl{PLUGIN_NAME}; TSRemapStatus TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo * /* rri ATS_UNUSED */) { - Config *const config = static_cast(ih); + PluginInfo *const info = static_cast(ih); - if (read_request(txnp, config)) { + if (read_request(txnp, &info->config, info->read_obj_size_contp)) { return TSREMAP_DID_REMAP_STOP; } else { return TSREMAP_NO_REMAP; @@ -234,9 +300,16 @@ TSRemapOSResponse(void * /* ih ATS_UNUSED */, TSHttpTxn /* rh ATS_UNUSED */, int TSReturnCode TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int /* errbuf_size */) { - Config *const config = new Config; - config->fromArgs(argc - 2, argv + 2); - *ih = static_cast(config); + PluginInfo *const info = new PluginInfo; + + info->config.fromArgs(argc - 2, argv + 2); + + TSCont read_obj_size_contp = TSContCreate(read_obj_size, nullptr); + TSContDataSet(read_obj_size_contp, static_cast(info)); + info->read_obj_size_contp = read_obj_size_contp; + + *ih = static_cast(info); + return TS_SUCCESS; } @@ -244,8 +317,9 @@ void TSRemapDeleteInstance(void *ih) { if (nullptr != ih) { - Config *const config = static_cast(ih); - delete config; + PluginInfo *const info = static_cast(ih); + TSContDestroy(info->read_obj_size_contp); + delete info; } } @@ -273,8 +347,9 @@ TSPluginInit(int argc, char const *argv[]) globalConfig.fromArgs(argc - 1, argv + 1); - TSCont const contp(TSContCreate(global_read_request_hook, nullptr)); + TSCont const global_read_request_contp(TSContCreate(global_read_request_hook, nullptr)); + global_read_obj_size_contp = TSContCreate(read_obj_size, nullptr); // Called immediately after the request header is read from the client - TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, contp); + TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, global_read_request_contp); } diff --git a/plugins/slice/unit-tests/CMakeLists.txt b/plugins/slice/unit-tests/CMakeLists.txt index 7a5aeee69e8..098372a3cce 100644 --- a/plugins/slice/unit-tests/CMakeLists.txt +++ b/plugins/slice/unit-tests/CMakeLists.txt @@ -25,7 +25,7 @@ target_compile_definitions(test_range PRIVATE UNITTEST) target_link_libraries(test_range PRIVATE catch2::catch2 ts::tsutil) add_test(NAME test_range COMMAND test_range) -add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc) +add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc ${PROJECT_SOURCE_DIR}/cache.cc) target_compile_definitions(test_config PRIVATE UNITTEST) target_link_libraries(test_config PRIVATE PCRE::PCRE catch2::catch2 ts::tsutil) add_test(NAME test_config COMMAND test_config) diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc index 76abd22c974..e6bc8f892a4 100644 --- a/plugins/slice/unit-tests/test_cache.cc +++ b/plugins/slice/unit-tests/test_cache.cc @@ -24,6 +24,7 @@ #include #include #include +#include #define CATCH_CONFIG_MAIN /* include main function */ #include "catch.hpp" /* catch unit-test framework */ #include "../cache.h" @@ -66,13 +67,33 @@ TEST_CASE("eviction", "[slice][metadatacache]") REQUIRE(found == cache_size); } +TEST_CASE("tiny cache", "[slice][metadatacache]") +{ + constexpr int cache_size = 1; + ObjectSizeCache cache{cache_size}; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + cache.set(ss.str(), i); + } + size_t found = 0; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == i); + found++; + } + } + REQUIRE(found == cache_size); +} + TEST_CASE("hit rate", "[slice][metadatacache]") { constexpr int cache_size = 10; ObjectSizeCache cache{cache_size}; - size_t hits = 0; - size_t misses = 0; std::mt19937 gen; std::poisson_distribution d{cache_size}; @@ -84,14 +105,50 @@ TEST_CASE("hit rate", "[slice][metadatacache]") std::optional size = cache.get(ss.str()); if (size.has_value()) { CHECK(size.value() == obj); - hits++; } else { cache.set(ss.str(), obj); - misses++; } } + auto [hits, misses, write_hits, write_misses] = cache.cache_stats(); INFO("Hits: " << hits); INFO("Misses: " << misses); REQUIRE(hits > cache_size * 50); } + +TEST_CASE("threads", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + + std::mt19937 gen; + std::poisson_distribution d{cache_size}; + std::vector threads; + + auto runfunc = [&]() { + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + uint64_t obj = d(gen); + + ss << "http://example.com/" << obj; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == obj); + } else { + cache.set(ss.str(), obj); + } + } + }; + + for (int i = 0; i < 4; i++) { + threads.emplace_back(runfunc); + } + + for (auto &t : threads) { + t.join(); + } + auto [hits, misses, write_hits, write_misses] = cache.cache_stats(); + INFO("Hits: " << hits); + INFO("Misses: " << misses); + REQUIRE(hits > cache_size * 50 * 4); +} From b7cde06f04724d624b6042bdb18acee26ad88aa6 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 1 Mar 2024 14:23:41 -0600 Subject: [PATCH 03/18] Metadata cache stats --- plugins/slice/Config.cc | 17 ++++++++++++++-- plugins/slice/Config.h | 9 +++++++-- plugins/slice/server.cc | 2 +- plugins/slice/slice.cc | 43 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 625db50c992..25cfb5219a3 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -351,8 +351,21 @@ Config::setCacheSize(size_t entries) } } +void +Config::updateStats(const std::function &update_func) +{ + // TODO: check if this update is too frequent + if (stats_enabled) { + auto [cache_read_hits, cache_read_misses, cache_write_hits, cache_write_misses] = m_oscache->cache_stats(); + update_func(stat_read_hits_id, cache_read_hits); + update_func(stat_read_misses_id, cache_read_misses); + update_func(stat_write_hits_id, cache_write_hits); + update_func(stat_write_misses_id, cache_write_misses); + } +} + bool -Config::is_known_large_obj(std::string_view url) +Config::isKnownLargeObj(std::string_view url) { if (m_min_size_to_slice <= 0) { // If conditional slicing is not set, all objects are large enough to slice @@ -372,7 +385,7 @@ Config::is_known_large_obj(std::string_view url) } void -Config::size_cache_add(std::string_view url, uint64_t size) +Config::sizeCacheAdd(std::string_view url, uint64_t size) { if (m_oscache) { DEBUG_LOG("Adding url to cache: %.*s -> %" PRIu64, static_cast(url.size()), url.data(), size); diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index e303b2ab8e4..ca9de710595 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -76,10 +76,15 @@ struct Config { bool matchesRegex(char const *const url, int const urllen) const; // Add an object size to cache - void size_cache_add(std::string_view url, uint64_t size); + void sizeCacheAdd(std::string_view url, uint64_t size); // Did we cache this internally as a small object? - bool is_known_large_obj(std::string_view url); + bool isKnownLargeObj(std::string_view url); + + // Metadata cache stats + int stat_read_hits_id{-1}, stat_read_misses_id{-1}, stat_write_hits_id{-1}, stat_write_misses_id{-1}; + bool stats_enabled{false}; + void updateStats(const std::function &update_func); private: TSHRTime m_nextlogtime{0}; // next time to log in ns diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index 760bf4837f4..5e86418c38f 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -99,7 +99,7 @@ update_object_size(TSHttpTxn txnp, int64_t size, Config &config) return; } - config.size_cache_add({urlstr, static_cast(urllen)}, static_cast(size)); + config.sizeCacheAdd({urlstr, static_cast(urllen)}, static_cast(size)); TSfree(urlstr); } else { ERROR_LOG("Could not get URL from transaction."); diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 3ba8194776a..56e81db4169 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -46,7 +46,9 @@ should_skip_this_obj(TSHttpTxn txnp, Config *const config) int len = 0; char *const urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &len); - if (!config->is_known_large_obj({urlstr, static_cast(len)})) { + config->updateStats([](int stat_id, uint64_t stat_value) { TSStatIntSet(stat_id, static_cast(stat_value)); }); + + if (!config->isKnownLargeObj({urlstr, static_cast(len)})) { DEBUG_LOG("Not a known large object, not slicing: %.*s", len, urlstr); return true; } @@ -243,7 +245,7 @@ read_obj_size(TSCont contp, TSEvent event, void *edata) [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); if (ec == std::errc()) { - info->config.size_cache_add({urlstr, static_cast(urllen)}, content_length); + info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); } else { ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); } @@ -297,6 +299,38 @@ TSRemapOSResponse(void * /* ih ATS_UNUSED */, TSHttpTxn /* rh ATS_UNUSED */, int { } +static bool +register_stat(const char *name, int &id) +{ + if (TSStatFindName(name, &id) == TS_ERROR) { + id = TSStatCreate(name, TS_RECORDDATATYPE_INT, TS_STAT_NON_PERSISTENT, TS_STAT_SYNC_SUM); + if (id == TS_ERROR) { + ERROR_LOG("Failed to register stat '%s'", name); + return false; + } + } + + DEBUG_LOG("[%s] %s registered with id %d", PLUGIN_NAME, name, id); + + return true; +} + +static void +init_stats(Config &config) +{ + const std::array, 4> stats{ + {{PLUGIN_NAME ".metadata_cache.read.hits", config.stat_read_hits_id}, + {PLUGIN_NAME ".metadata_cache.read.misses", config.stat_read_misses_id}, + {PLUGIN_NAME ".metadata_cache.write.hits", config.stat_write_hits_id}, + {PLUGIN_NAME ".metadata_cache.write.misses", config.stat_write_misses_id}} + }; + + config.stats_enabled = true; + for (const auto &stat : stats) { + config.stats_enabled &= register_stat(stat.first, stat.second); + } +} + TSReturnCode TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int /* errbuf_size */) { @@ -308,6 +342,8 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int / TSContDataSet(read_obj_size_contp, static_cast(info)); info->read_obj_size_contp = read_obj_size_contp; + init_stats(info->config); + *ih = static_cast(info); return TS_SUCCESS; @@ -352,4 +388,7 @@ TSPluginInit(int argc, char const *argv[]) // Called immediately after the request header is read from the client TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, global_read_request_contp); + + // Register stats for metadata cache + init_stats(globalConfig); } From 6cdd5868be67e161d0d0f954c15b99154f3e3a47 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 1 Mar 2024 16:02:29 -0600 Subject: [PATCH 04/18] Try to fix build on 9.2.x --- plugins/slice/slice.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 56e81db4169..2aea6125f27 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -29,6 +29,8 @@ #include #include +#include +#include namespace { From d7b4f3ffd23dd53b62293fa5cdbdc19e9c3cfc15 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 1 Mar 2024 18:28:57 -0600 Subject: [PATCH 05/18] Add more debug prints to track obj size updates --- plugins/slice/slice.cc | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 2aea6125f27..aa1cd45a58c 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -234,26 +234,27 @@ read_obj_size(TSCont contp, TSEvent event, void *edata) int urllen = 0; char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { - { - TxnHdrMgr response; - response.populateFrom(txnp, TSHttpTxnServerRespGet); - HttpHeader const resp_header(response.m_buffer, response.m_lochdr); - char constr[1024]; - int conlen; - bool const hasContentLength( - resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); - if (hasContentLength) { - uint64_t content_length; - - [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); - if (ec == std::errc()) { - info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); - } else { - ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); - } + TxnHdrMgr response; + response.populateFrom(txnp, TSHttpTxnServerRespGet); + HttpHeader const resp_header(response.m_buffer, response.m_lochdr); + char constr[1024]; + int conlen; + bool const hasContentLength(resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); + if (hasContentLength) { + uint64_t content_length; + + [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); + if (ec == std::errc()) { + info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); + } else { + ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); } + } else { + DEBUG_LOG("Could not get a content length for updating object size"); } TSfree(urlstr); + } else { + ERROR_LOG("Could not get URL for obj size."); } // Reenable and continue with the state machine. From d59c45a3aa731d5d810786198fb6668413bea93f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 1 Mar 2024 19:11:41 -0600 Subject: [PATCH 06/18] Fix an uninitialized length in read_obj_size --- plugins/slice/slice.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index aa1cd45a58c..e364a238224 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -238,7 +238,7 @@ read_obj_size(TSCont contp, TSEvent event, void *edata) response.populateFrom(txnp, TSHttpTxnServerRespGet); HttpHeader const resp_header(response.m_buffer, response.m_lochdr); char constr[1024]; - int conlen; + int conlen = sizeof constr; bool const hasContentLength(resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); if (hasContentLength) { uint64_t content_length; From e54a885106fcca0532e2c602af392b98cf71d759 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 5 Mar 2024 16:08:53 -0600 Subject: [PATCH 07/18] Cleanups * Simplify read_request logic. * Make an error message better. --- plugins/slice/Config.cc | 2 +- plugins/slice/slice.cc | 29 +++++++++++------------------ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 25cfb5219a3..8cedc56eb85 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -282,7 +282,7 @@ Config::fromArgs(int const argc, char const *const argv[]) if (m_oscache.has_value()) { DEBUG_LOG("Metadata cache size: %zu", m_oscache->cache_size()); } else { - ERROR_LOG("An metadata cache is required when --minimum-size is specified! Using a default size of 16384 entries."); + ERROR_LOG("--metadata-cache-size is required when --minimum-size is specified! Using a default size of 16384."); setCacheSize(16384); } } diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index e364a238224..4fad1c62095 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -66,16 +66,13 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) hdrmgr.populateFrom(txnp, TSHttpTxnClientReqGet); HttpHeader const header(hdrmgr.m_buffer, hdrmgr.m_lochdr); - bool ret = false; - bool should_read_obj_size = false; - if (TS_HTTP_METHOD_GET == header.method() || TS_HTTP_METHOD_HEAD == header.method() || TS_HTTP_METHOD_PURGE == header.method()) { if (!header.hasKey(config->m_skip_header.data(), config->m_skip_header.size())) { // check if any previous plugin has monkeyed with the transaction status TSHttpStatus const txnstat = TSHttpTxnStatusGet(txnp); if (TS_HTTP_STATUS_NONE != txnstat) { DEBUG_LOG("txn status change detected (%d), skipping plugin\n", static_cast(txnstat)); - goto out; + return false; } if (config->hasRegex()) { @@ -86,7 +83,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) if (!shouldslice) { DEBUG_LOG("request failed regex, not slicing: '%.*s'", urllen, urlstr); TSfree(urlstr); - goto out; + return false; } DEBUG_LOG("request passed regex, slicing: '%.*s'", urllen, urlstr); @@ -103,7 +100,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) // connection back into ATS sockaddr const *const ip = TSHttpTxnClientAddrGet(txnp); if (nullptr == ip) { - goto out; + return false; } TSAssert(nullptr != config); @@ -118,20 +115,20 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) } else if (AF_INET6 == ip->sa_family) { memcpy(&data->m_client_ip, ip, sizeof(sockaddr_in6)); } else { - goto out; + return false; } // need to reset the HOST field for global plugin data->m_hostlen = sizeof(data->m_hostname) - 1; if (!header.valueForKey(TS_MIME_FIELD_HOST, TS_MIME_LEN_HOST, data->m_hostname, &data->m_hostlen)) { DEBUG_LOG("Unable to get hostname from header"); - goto out; + return false; } // check if object is previously known to be too small to slice if (should_skip_this_obj(txnp, config)) { - should_read_obj_size = true; - goto out; + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_obj_size_contp); + return false; } // is the plugin configured to use a remap host? @@ -150,7 +147,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) if (TS_SUCCESS != rcode) { ERROR_LOG("Error cloning pristine url"); TSMBufferDestroy(newbuf); - goto out; + return false; } data->m_urlbuf = newbuf; @@ -183,7 +180,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) TSHandleMLocRelease(newbuf, nullptr, newloc); } TSMBufferDestroy(newbuf); - goto out; + return false; } data->m_urlbuf = newbuf; @@ -212,17 +209,13 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) // Grab the transaction TSHttpTxnIntercept(icontp, txnp); - ret = true; + return true; } else { DEBUG_LOG("slice passing GET or HEAD request through to next plugin"); } } -out: - if (should_read_obj_size) { - TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_obj_size_contp); - } - return ret; + return false; } static int From 66a98a3e624391524ba33e17497f933979412290 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 7 Mar 2024 15:37:21 -0600 Subject: [PATCH 08/18] Allow caching for small objects * Turn cache off only for large and un-sliced requests. * Fix a crash when not conditionally slicing. * Cleanup some function names --- plugins/slice/Config.cc | 2 +- plugins/slice/slice.cc | 39 +++++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 8cedc56eb85..055f1a6a2bf 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -355,7 +355,7 @@ void Config::updateStats(const std::function &update_func) { // TODO: check if this update is too frequent - if (stats_enabled) { + if (stats_enabled && m_oscache) { auto [cache_read_hits, cache_read_misses, cache_write_hits, cache_write_misses] = m_oscache->cache_stats(); update_func(stat_read_hits_id, cache_read_hits); update_func(stat_read_misses_id, cache_read_misses); diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 4fad1c62095..14b8ba29a5b 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -30,17 +30,16 @@ #include #include #include -#include namespace { struct PluginInfo { Config config; - TSCont read_obj_size_contp; + TSCont read_resp_hdr_contp; }; Config globalConfig; -TSCont global_read_obj_size_contp; +TSCont global_read_resp_hdr_contp; static bool should_skip_this_obj(TSHttpTxn txnp, Config *const config) @@ -59,7 +58,7 @@ should_skip_this_obj(TSHttpTxn txnp, Config *const config) } bool -read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) +read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) { DEBUG_LOG("slice read_request"); TxnHdrMgr hdrmgr; @@ -91,11 +90,6 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) } } - // turn off any and all transaction caching (shouldn't matter) - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_SERVER_NO_STORE, true); - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_RESPONSE_CACHEABLE, false); - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_REQUEST_CACHEABLE, false); - DEBUG_LOG("slice accepting and slicing"); // connection back into ATS sockaddr const *const ip = TSHttpTxnClientAddrGet(txnp); @@ -127,7 +121,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) // check if object is previously known to be too small to slice if (should_skip_this_obj(txnp, config)) { - TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_obj_size_contp); + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_resp_hdr_contp); return false; } @@ -219,11 +213,16 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_obj_size_contp) } static int -read_obj_size(TSCont contp, TSEvent event, void *edata) +read_resp_hdr(TSCont contp, TSEvent event, void *edata) { TSHttpTxn txnp = static_cast(edata); PluginInfo *info = static_cast(TSContDataGet(contp)); + // This function does the following things: + // 1. Parse the object size from Content-Length + // 2. Cache the object size + // 3. If the object will be sliced in subsequent requests, turn off the cache to avoid taking up space, and head-of-line blocking. + int urllen = 0; char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { @@ -239,6 +238,10 @@ read_obj_size(TSCont contp, TSEvent event, void *edata) [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); if (ec == std::errc()) { info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); + if (content_length >= info->config.m_min_size_to_slice) { + // This object will be sliced in future requests. Don't cache it for now. + TSHttpTxnServerRespNoStoreSet(txnp, 1); + } } else { ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); } @@ -263,7 +266,7 @@ global_read_request_hook(TSCont // contp void *edata) { TSHttpTxn const txnp = static_cast(edata); - read_request(txnp, &globalConfig, global_read_obj_size_contp); + read_request(txnp, &globalConfig, global_read_resp_hdr_contp); TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); return 0; } @@ -282,7 +285,7 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo * /* rri ATS_UNUSED { PluginInfo *const info = static_cast(ih); - if (read_request(txnp, &info->config, info->read_obj_size_contp)) { + if (read_request(txnp, &info->config, info->read_resp_hdr_contp)) { return TSREMAP_DID_REMAP_STOP; } else { return TSREMAP_NO_REMAP; @@ -334,9 +337,9 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int / info->config.fromArgs(argc - 2, argv + 2); - TSCont read_obj_size_contp = TSContCreate(read_obj_size, nullptr); - TSContDataSet(read_obj_size_contp, static_cast(info)); - info->read_obj_size_contp = read_obj_size_contp; + TSCont read_resp_hdr_contp = TSContCreate(read_resp_hdr, nullptr); + TSContDataSet(read_resp_hdr_contp, static_cast(info)); + info->read_resp_hdr_contp = read_resp_hdr_contp; init_stats(info->config); @@ -350,7 +353,7 @@ TSRemapDeleteInstance(void *ih) { if (nullptr != ih) { PluginInfo *const info = static_cast(ih); - TSContDestroy(info->read_obj_size_contp); + TSContDestroy(info->read_resp_hdr_contp); delete info; } } @@ -380,7 +383,7 @@ TSPluginInit(int argc, char const *argv[]) globalConfig.fromArgs(argc - 1, argv + 1); TSCont const global_read_request_contp(TSContCreate(global_read_request_hook, nullptr)); - global_read_obj_size_contp = TSContCreate(read_obj_size, nullptr); + global_read_resp_hdr_contp = TSContCreate(read_resp_hdr, nullptr); // Called immediately after the request header is read from the client TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, global_read_request_contp); From 68af857c9b1e4e0ccc5b995a656ad586473c58c7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Mar 2024 17:05:48 -0500 Subject: [PATCH 09/18] Reduce Cache Size, Change Stats * Only store large URLs in metadata cache. * The old stats don't make sense in this new cache. Change metadata stats. --- plugins/slice/Config.cc | 31 +++++++++++------------ plugins/slice/Config.h | 7 ++++-- plugins/slice/cache.cc | 34 ++++++++++++++++---------- plugins/slice/cache.h | 22 ++++++++++------- plugins/slice/server.cc | 9 ++++++- plugins/slice/slice.cc | 33 ++++++++++++++----------- plugins/slice/unit-tests/test_cache.cc | 24 +++++++++++++++--- 7 files changed, 103 insertions(+), 57 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 055f1a6a2bf..93b3c9095e9 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -126,13 +126,14 @@ Config::fromArgs(int const argc, char const *const argv[]) {const_cast("strip-range-for-head"), no_argument, nullptr, 'h'}, {const_cast("minimum-size"), required_argument, nullptr, 'm'}, {const_cast("metadata-cache-size"), required_argument, nullptr, 'z'}, + {const_cast("stats-prefix"), required_argument, nullptr, 'x'}, {nullptr, 0, nullptr, 0 }, }; // getopt assumes args start at '1' so this hack is needed char *const *argvp = (const_cast(argv) - 1); for (;;) { - int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lm:p:r:s:t:z:", longopts, nullptr); + int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lm:p:r:s:t:x:z:", longopts, nullptr); if (-1 == opt) { break; } @@ -250,6 +251,10 @@ Config::fromArgs(int const argc, char const *const argv[]) ERROR_LOG("Metadata cache size out of range: %s", optarg); } } break; + case 'x': { + stat_prefix = optarg; + DEBUG_LOG("Stat prefix: %s", stat_prefix.c_str()); + } break; default: break; } @@ -280,7 +285,7 @@ Config::fromArgs(int const argc, char const *const argv[]) if (m_min_size_to_slice > 0) { if (m_oscache.has_value()) { - DEBUG_LOG("Metadata cache size: %zu", m_oscache->cache_size()); + DEBUG_LOG("Metadata cache size: %zu", m_oscache->cache_capacity()); } else { ERROR_LOG("--metadata-cache-size is required when --minimum-size is specified! Using a default size of 16384."); setCacheSize(16384); @@ -351,19 +356,6 @@ Config::setCacheSize(size_t entries) } } -void -Config::updateStats(const std::function &update_func) -{ - // TODO: check if this update is too frequent - if (stats_enabled && m_oscache) { - auto [cache_read_hits, cache_read_misses, cache_write_hits, cache_write_misses] = m_oscache->cache_stats(); - update_func(stat_read_hits_id, cache_read_hits); - update_func(stat_read_misses_id, cache_read_misses); - update_func(stat_write_hits_id, cache_write_hits); - update_func(stat_write_misses_id, cache_write_misses); - } -} - bool Config::isKnownLargeObj(std::string_view url) { @@ -392,3 +384,12 @@ Config::sizeCacheAdd(std::string_view url, uint64_t size) m_oscache->set(url, size); } } + +void +Config::sizeCacheRemove(std::string_view url) +{ + if (m_oscache) { + DEBUG_LOG("Removing url from cache: %.*s", static_cast(url.size()), url.data()); + m_oscache->remove(url); + } +} diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index ca9de710595..b4007cb618b 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -78,13 +78,16 @@ struct Config { // Add an object size to cache void sizeCacheAdd(std::string_view url, uint64_t size); + // Remove an object size + void sizeCacheRemove(std::string_view url); + // Did we cache this internally as a small object? bool isKnownLargeObj(std::string_view url); // Metadata cache stats - int stat_read_hits_id{-1}, stat_read_misses_id{-1}, stat_write_hits_id{-1}, stat_write_misses_id{-1}; + std::string stat_prefix{}; + int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}; bool stats_enabled{false}; - void updateStats(const std::function &update_func); private: TSHRTime m_nextlogtime{0}; // next time to log in ns diff --git a/plugins/slice/cache.cc b/plugins/slice/cache.cc index c82ac932505..28d5027de6c 100644 --- a/plugins/slice/cache.cc +++ b/plugins/slice/cache.cc @@ -25,7 +25,7 @@ #include ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) - : _cache_size(cache_size), _urls(cache_size), _object_sizes(cache_size, 0), _visits(cache_size, false) + : _cache_capacity(cache_size), _urls(cache_size), _object_sizes(cache_size, 0), _visits(cache_size, false) { } @@ -35,14 +35,12 @@ ObjectSizeCache::get(const std::string_view url) std::lock_guard lock{_mutex}; if (auto it = _index.find(url); it != _index.end()) { // Cache hit - _cache_hits++; cache_size_type i = it->second; _visits[i] = true; assert(url == _urls[i]); return _object_sizes[i]; } else { // Cache miss - _cache_misses++; return std::nullopt; } } @@ -54,23 +52,33 @@ ObjectSizeCache::set(const std::string_view url, uint64_t object_size) cache_size_type i; if (auto it = _index.find(url); it != _index.end()) { // Already exists in cache. Overwrite. - _cache_write_hits++; i = it->second; } else { // Doesn't exist in cache. Evict something else. - _cache_write_misses++; find_eviction_slot(); i = _hand; _urls[i] = url; _index[_urls[i]] = _hand; _hand++; - if (_hand >= _cache_size) { + if (_hand >= _cache_capacity) { _hand = 0; } } _object_sizes[i] = object_size; } +void +ObjectSizeCache::remove(const std::string_view url) +{ + std::lock_guard lock{_mutex}; + if (auto it = _index.find(url); it != _index.end()) { + cache_size_type i = it->second; + _visits[i] = false; + _urls[i].erase(); + _index.erase(it); + } +} + /** * @brief Make _hand point to the next entry that should be replaced, and clear that entry if it exists. * @@ -81,7 +89,7 @@ ObjectSizeCache::find_eviction_slot() while (_visits[_hand]) { _visits[_hand] = false; _hand++; - if (_hand >= _cache_size) { + if (_hand >= _cache_capacity) { _hand = 0; } } @@ -94,15 +102,15 @@ ObjectSizeCache::find_eviction_slot() _urls[_hand].erase(); } } -std::tuple -ObjectSizeCache::cache_stats() + +ObjectSizeCache::cache_size_type +ObjectSizeCache::cache_capacity() { - std::lock_guard lock{_mutex}; - return {_cache_hits, _cache_misses, _cache_write_hits, _cache_write_misses}; + return _cache_capacity; } ObjectSizeCache::cache_size_type -ObjectSizeCache::cache_size() +ObjectSizeCache::cache_count() { - return _cache_size; + return _index.size(); } diff --git a/plugins/slice/cache.h b/plugins/slice/cache.h index fcd53a36e41..76abcdb81c4 100644 --- a/plugins/slice/cache.h +++ b/plugins/slice/cache.h @@ -34,7 +34,7 @@ class ObjectSizeCache using cache_size_type = size_t; using object_size_type = uint64_t; - ObjectSizeCache(cache_size_type cache_size); + ObjectSizeCache(cache_size_type capacity); /** * @brief Get an object size from cache. @@ -52,23 +52,27 @@ class ObjectSizeCache */ void set(const std::string_view url, object_size_type object_size); - cache_size_type cache_size(); + /** + * @brief Remove an object from cache + * + * @param url The URL of the object + */ + void remove(const std::string_view url); + + // Max capacity of the cache + cache_size_type cache_capacity(); - std::tuple cache_stats(); + // Current number of used entries in the cache + cache_size_type cache_count(); private: void find_eviction_slot(); - cache_size_type _cache_size; + cache_size_type _cache_capacity; cache_size_type _hand{0}; std::vector _urls; std::vector _object_sizes; std::vector _visits; std::unordered_map _index; std::mutex _mutex; - - uint64_t _cache_hits = 0; - uint64_t _cache_misses = 0; - uint64_t _cache_write_hits = 0; - uint64_t _cache_write_misses = 0; }; diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index 5e86418c38f..12ffea6c0f5 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -99,7 +99,14 @@ update_object_size(TSHttpTxn txnp, int64_t size, Config &config) return; } - config.sizeCacheAdd({urlstr, static_cast(urllen)}, static_cast(size)); + if (static_cast(size) >= config.m_min_size_to_slice) { + config.sizeCacheAdd({urlstr, static_cast(urllen)}, static_cast(size)); + TSStatIntIncrement(config.stat_TP, 1); + } else { + config.sizeCacheRemove({urlstr, static_cast(urllen)}); + TSStatIntIncrement(config.stat_FP, 1); + } + TSfree(urlstr); } else { ERROR_LOG("Could not get URL from transaction."); diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 14b8ba29a5b..04ddfdb00e2 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -47,8 +47,6 @@ should_skip_this_obj(TSHttpTxn txnp, Config *const config) int len = 0; char *const urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &len); - config->updateStats([](int stat_id, uint64_t stat_value) { TSStatIntSet(stat_id, static_cast(stat_value)); }); - if (!config->isKnownLargeObj({urlstr, static_cast(len)})) { DEBUG_LOG("Not a known large object, not slicing: %.*s", len, urlstr); return true; @@ -119,9 +117,9 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) return false; } - // check if object is previously known to be too small to slice + // check if object is large enough to slice - skip small and unknown size objects if (should_skip_this_obj(txnp, config)) { - TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_resp_hdr_contp); + TSHttpTxnHookAdd(txnp, TS_HTTP_SEND_RESPONSE_HDR_HOOK, read_resp_hdr_contp); return false; } @@ -227,7 +225,7 @@ read_resp_hdr(TSCont contp, TSEvent event, void *edata) char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { TxnHdrMgr response; - response.populateFrom(txnp, TSHttpTxnServerRespGet); + response.populateFrom(txnp, TSHttpTxnClientRespGet); HttpHeader const resp_header(response.m_buffer, response.m_lochdr); char constr[1024]; int conlen = sizeof constr; @@ -237,10 +235,14 @@ read_resp_hdr(TSCont contp, TSEvent event, void *edata) [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); if (ec == std::errc()) { - info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); if (content_length >= info->config.m_min_size_to_slice) { + // Remember that this object is big + info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); // This object will be sliced in future requests. Don't cache it for now. TSHttpTxnServerRespNoStoreSet(txnp, 1); + TSStatIntIncrement(info->config.stat_FN, 1); + } else { + TSStatIntIncrement(info->config.stat_TN, 1); } } else { ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); @@ -315,18 +317,19 @@ register_stat(const char *name, int &id) } static void -init_stats(Config &config) +init_stats(Config &config, const std::string &prefix) { const std::array, 4> stats{ - {{PLUGIN_NAME ".metadata_cache.read.hits", config.stat_read_hits_id}, - {PLUGIN_NAME ".metadata_cache.read.misses", config.stat_read_misses_id}, - {PLUGIN_NAME ".metadata_cache.write.hits", config.stat_write_hits_id}, - {PLUGIN_NAME ".metadata_cache.write.misses", config.stat_write_misses_id}} + {{".metadata_cache.true_large_objects", config.stat_TP}, + {".metadata_cache.true_small_objects", config.stat_TN}, + {".metadata_cache.false_large_objects", config.stat_FP}, + {".metadata_cache.false_small_objects", config.stat_FN}} }; config.stats_enabled = true; for (const auto &stat : stats) { - config.stats_enabled &= register_stat(stat.first, stat.second); + const std::string name = std::string{PLUGIN_NAME "."} + prefix + stat.first; + config.stats_enabled &= register_stat(name.c_str(), stat.second); } } @@ -341,7 +344,9 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int / TSContDataSet(read_resp_hdr_contp, static_cast(info)); info->read_resp_hdr_contp = read_resp_hdr_contp; - init_stats(info->config); + if (!info->config.stat_prefix.empty()) { + init_stats(info->config, info->config.stat_prefix); + } *ih = static_cast(info); @@ -389,5 +394,5 @@ TSPluginInit(int argc, char const *argv[]) TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, global_read_request_contp); // Register stats for metadata cache - init_stats(globalConfig); + init_stats(globalConfig, "global"); } diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc index e6bc8f892a4..f4decc3c8b8 100644 --- a/plugins/slice/unit-tests/test_cache.cc +++ b/plugins/slice/unit-tests/test_cache.cc @@ -45,6 +45,19 @@ TEST_CASE("cache hit", "[slice][metadatacache]") CHECK(res2.value() == 123); } +TEST_CASE("cache remove", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + cache.set("example.com/123"sv, 123); + std::optional res2 = cache.get("example.com/123"sv); + CHECK(res2.value() == 123); + cache.remove("example.com/123"sv); + std::optional res3 = cache.get("example.com/123"sv); + REQUIRE(!res3.has_value()); + REQUIRE(cache.cache_count() == 0); + REQUIRE(cache.cache_capacity() == 1024); +} + TEST_CASE("eviction", "[slice][metadatacache]") { constexpr int cache_size = 10; @@ -93,9 +106,9 @@ TEST_CASE("hit rate", "[slice][metadatacache]") { constexpr int cache_size = 10; ObjectSizeCache cache{cache_size}; - std::mt19937 gen; std::poisson_distribution d{cache_size}; + std::atomic hits{0}, misses{0}; for (uint64_t i = 0; i < cache_size * 100; i++) { std::stringstream ss; @@ -105,12 +118,13 @@ TEST_CASE("hit rate", "[slice][metadatacache]") std::optional size = cache.get(ss.str()); if (size.has_value()) { CHECK(size.value() == obj); + hits++; } else { cache.set(ss.str(), obj); + misses++; } } - auto [hits, misses, write_hits, write_misses] = cache.cache_stats(); INFO("Hits: " << hits); INFO("Misses: " << misses); REQUIRE(hits > cache_size * 50); @@ -124,6 +138,7 @@ TEST_CASE("threads", "[slice][metadatacache]") std::mt19937 gen; std::poisson_distribution d{cache_size}; std::vector threads; + std::atomic hits{0}, misses{0}; auto runfunc = [&]() { for (uint64_t i = 0; i < cache_size * 100; i++) { @@ -134,8 +149,10 @@ TEST_CASE("threads", "[slice][metadatacache]") std::optional size = cache.get(ss.str()); if (size.has_value()) { CHECK(size.value() == obj); + hits++; } else { cache.set(ss.str(), obj); + misses++; } } }; @@ -147,8 +164,9 @@ TEST_CASE("threads", "[slice][metadatacache]") for (auto &t : threads) { t.join(); } - auto [hits, misses, write_hits, write_misses] = cache.cache_stats(); INFO("Hits: " << hits); INFO("Misses: " << misses); REQUIRE(hits > cache_size * 50 * 4); + REQUIRE(cache.cache_count() == cache_size); + REQUIRE(cache.cache_capacity() == cache_size); } From bfa4b5c7ef4196711b84ca3b1248d52ab65f6212 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 19 Mar 2024 17:06:35 -0500 Subject: [PATCH 10/18] Fix hook, add err stats, add autest --- plugins/slice/Config.h | 2 +- plugins/slice/slice.cc | 20 ++- .../slice/slice_conditional.test.py | 149 ++++++++++++++++++ 3 files changed, 165 insertions(+), 6 deletions(-) create mode 100644 tests/gold_tests/pluginTest/slice/slice_conditional.test.py diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index b4007cb618b..d01df49929f 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -86,7 +86,7 @@ struct Config { // Metadata cache stats std::string stat_prefix{}; - int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}; + int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}, stat_no_cl{0}, stat_bad_cl{0}, stat_no_url{0}; bool stats_enabled{false}; private: diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 04ddfdb00e2..a17992e8d6a 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -119,7 +119,8 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) // check if object is large enough to slice - skip small and unknown size objects if (should_skip_this_obj(txnp, config)) { - TSHttpTxnHookAdd(txnp, TS_HTTP_SEND_RESPONSE_HDR_HOOK, read_resp_hdr_contp); + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_resp_hdr_contp); + TSHttpTxnHookAdd(txnp, TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, read_resp_hdr_contp); return false; } @@ -225,7 +226,8 @@ read_resp_hdr(TSCont contp, TSEvent event, void *edata) char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { TxnHdrMgr response; - response.populateFrom(txnp, TSHttpTxnClientRespGet); + TxnHdrMgr::HeaderGetFunc func = event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE ? TSHttpTxnCachedRespGet : TSHttpTxnServerRespGet; + response.populateFrom(txnp, func); HttpHeader const resp_header(response.m_buffer, response.m_lochdr); char constr[1024]; int conlen = sizeof constr; @@ -246,13 +248,16 @@ read_resp_hdr(TSCont contp, TSEvent event, void *edata) } } else { ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); + TSStatIntIncrement(info->config.stat_bad_cl, 1); } } else { DEBUG_LOG("Could not get a content length for updating object size"); + TSStatIntIncrement(info->config.stat_no_cl, 1); } TSfree(urlstr); } else { ERROR_LOG("Could not get URL for obj size."); + TSStatIntIncrement(info->config.stat_no_url, 1); } // Reenable and continue with the state machine. @@ -319,11 +324,16 @@ register_stat(const char *name, int &id) static void init_stats(Config &config, const std::string &prefix) { - const std::array, 4> stats{ - {{".metadata_cache.true_large_objects", config.stat_TP}, + const std::array, 7> stats{ + { + {".metadata_cache.true_large_objects", config.stat_TP}, {".metadata_cache.true_small_objects", config.stat_TN}, {".metadata_cache.false_large_objects", config.stat_FP}, - {".metadata_cache.false_small_objects", config.stat_FN}} + {".metadata_cache.false_small_objects", config.stat_FN}, + {".metadata_cache.no_content_length", config.stat_no_cl}, + {".metadata_cache.bad_content_length", config.stat_bad_cl}, + {".metadata_cache.no_url", config.stat_no_url}, + } }; config.stats_enabled = true; diff --git a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py new file mode 100644 index 00000000000..3e390797988 --- /dev/null +++ b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py @@ -0,0 +1,149 @@ +''' +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Test.Summary = ''' +Conditional Slicing test +''' + +# Test description: +# Preload the cache with the entire asset to be range requested. +# Reload remap rule with slice plugin +# Request content through the slice plugin + +Test.SkipUnless( + Condition.PluginExists('slice.so'), + Condition.PluginExists('cache_range_requests.so'), + Condition.PluginExists('xdebug.so'), +) +Test.ContinueOnFail = False + +# configure origin server +server = Test.MakeOriginServer("server", lookup_key="{PATH}{%Range}", options={'-v': None}) + +# Define ATS and configure +ts = Test.MakeATSProcess("ts") + +# small object, should not be sliced +req_small = { + "headers": "GET /small HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", + "body": "", +} +res_small = { + "headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": "smol", +} +server.addResponse("sessionlog.json", req_small, res_small) + +large_body = "large object!" +# large object, all in one slice +req_large = { + "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", + "body": "", +} +res_large = { + "headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": large_body +} +server.addResponse("sessionlog.json", req_large, res_large) + +# large object, this populates the individual slices in the server + +body_len = len(large_body) +slice_begin = 0 +slice_end = 0 +slice_block_size = 10 +while (slice_end < body_len): + # 1st slice + slice_end = slice_begin + slice_block_size + req_large_slice = { + "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + f"Range: bytes={slice_begin}-{slice_end - 1}" + "\r\n", + "body": "", + } + if slice_end > body_len: + slice_end = body_len + res_large_slice = { + "headers": + "HTTP/1.1 206 Partial Content\r\n" + "Accept-Ranges: bytes\r\n" + + f"Content-Range: bytes {slice_begin}-{slice_end - 1}/{body_len}\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": large_body[slice_begin:slice_end] + } + server.addResponse("sessionlog.json", req_large_slice, res_large_slice) + slice_begin = slice_end + +# set up slice plugin with remap host into cache_range_requests +ts.Disk.remap_config.AddLines( + [ + f'map http://slice/ http://127.0.0.1:{server.Variables.Port}/' + + f' @plugin=slice.so @pparam=--blockbytes-test={slice_block_size} @pparam=--minimum-size=8 @pparam=--metadata-cache-size=4 @plugin=cache_range_requests.so' + ]) + +ts.Disk.plugin_config.AddLine('xdebug.so --enable=x-cache') +ts.Disk.records_config.update( + { + 'proxy.config.diags.debug.enabled': '1', + 'proxy.config.diags.debug.tags': 'http|cache|slice|xdebug', + }) + +curl_and_args = 'curl -s -D /dev/stdout -o /dev/stderr -x localhost:{}'.format(ts.Variables.port) + ' -H "x-debug: x-cache"' + +# Test case 1: first request of small object +tr = Test.AddTestRun("Small request 1") +ps = tr.Processes.Default +ps.StartBefore(server, ready=When.PortOpen(server.Variables.Port)) +ps.StartBefore(Test.Processes.ts) +ps.Command = curl_and_args + ' http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('smol', 'expected smol') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +# Test case 2: second request of small object - expect cache hit +tr = Test.AddTestRun("Small request 2") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('smol', 'expected smol') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts + +# Test case 3: first request of large object - expect unsliced, cache write disabled +tr = Test.AddTestRun("Large request 1") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +# Test case 4: first request of large object - expect sliced, cache miss +tr = Test.AddTestRun("Large request 2") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +## Test case 4: first request of large object - expect cache hit +#tr = Test.AddTestRun("Large request 3") +#ps = tr.Processes.Default +#ps.Command = curl_and_args + ' http://slice/large' +#ps.ReturnCode = 0 +#ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') +#ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +#tr.StillRunningAfter = ts From f0ba7348aad1e0cc8fe4a61801dd68f4740598f5 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 19 Mar 2024 17:49:26 -0500 Subject: [PATCH 11/18] autest - close connection to end test gracefully --- tests/gold_tests/pluginTest/slice/slice_conditional.test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py index 3e390797988..305f4285288 100644 --- a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py +++ b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py @@ -78,7 +78,7 @@ slice_end = body_len res_large_slice = { "headers": - "HTTP/1.1 206 Partial Content\r\n" + "Accept-Ranges: bytes\r\n" + + "HTTP/1.1 206 Partial Content\r\n" + "Connection: close\r\n" + "Accept-Ranges: bytes\r\n" + f"Content-Range: bytes {slice_begin}-{slice_end - 1}/{body_len}\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", "body": large_body[slice_begin:slice_end] } From 8768f1d14c3e344646913dc9dac9e3e498f60576 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 22 Mar 2024 17:20:18 -0500 Subject: [PATCH 12/18] Skip CRR on ranged request from client We're not normalizing for un-sliced requests, so don't use CRR. --- .../cache_range_requests.cc | 35 ++++++++++++--- plugins/slice/slice.cc | 13 ++++++ .../slice/slice_conditional.test.py | 45 +++++++++++-------- 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/plugins/cache_range_requests/cache_range_requests.cc b/plugins/cache_range_requests/cache_range_requests.cc index 4481b60ce4d..a0960333548 100644 --- a/plugins/cache_range_requests/cache_range_requests.cc +++ b/plugins/cache_range_requests/cache_range_requests.cc @@ -50,9 +50,11 @@ enum parent_select_mode_t { PS_CACHEKEY_URL, // Set parent selection url to cache_key url }; -constexpr std::string_view DefaultImsHeader = {"X-Crr-Ims"}; -constexpr std::string_view SLICE_CRR_HEADER = {"Slice-Crr-Status"}; -constexpr std::string_view SLICE_CRR_VAL = "1"; +using namespace std::string_view_literals; +constexpr std::string_view DefaultImsHeader = "X-Crr-Ims"sv; +constexpr std::string_view SLICE_CRR_HEADER = "Slice-Crr-Status"sv; +constexpr std::string_view SLICE_CRR_VAL = "1"sv; +constexpr std::string_view SKIP_CRR_HDR_NAME = "X-Skip-Crr"sv; struct pluginconfig { parent_select_mode_t ps_mode{PS_DEFAULT}; @@ -86,6 +88,7 @@ bool set_header(TSMBuffer, TSMLoc, const char *, int, const char int transaction_handler(TSCont, TSEvent, void *); struct pluginconfig *create_pluginconfig(int argc, char *const argv[]); void delete_pluginconfig(pluginconfig *const); +static bool has_skip_crr_header(TSHttpTxn); /** * Creates pluginconfig data structure @@ -192,12 +195,32 @@ handle_read_request_header(TSCont /* txn_contp ATS_UNUSED */, TSEvent /* event A { TSHttpTxn txnp = static_cast(edata); - range_header_check(txnp, gPluginConfig); + if (!has_skip_crr_header(txnp)) { + range_header_check(txnp, gPluginConfig); + } TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); return 0; } +static bool +has_skip_crr_header(TSHttpTxn txnp) +{ + TSMBuffer hdr_buf = nullptr; + TSMLoc hdr_loc = TS_NULL_MLOC; + bool ret = false; + + if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &hdr_buf, &hdr_loc)) { + TSMLoc const skip_crr_loc = TSMimeHdrFieldFind(hdr_buf, hdr_loc, SKIP_CRR_HDR_NAME.data(), SKIP_CRR_HDR_NAME.length()); + if (TS_NULL_MLOC != skip_crr_loc) { + TSHandleMLocRelease(hdr_buf, hdr_loc, skip_crr_loc); + ret = true; + } + TSHandleMLocRelease(hdr_buf, TS_NULL_MLOC, hdr_loc); + } + return ret; +} + /** * Reads the client request header and if this is a range request: * @@ -682,7 +705,9 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo * /* rri */) { pluginconfig *const pc = static_cast(ih); - range_header_check(txnp, pc); + if (!has_skip_crr_header(txnp)) { + range_header_check(txnp, pc); + } return TSREMAP_NO_REMAP; } diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index a17992e8d6a..c97688a33ec 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -30,9 +30,14 @@ #include #include #include +#include namespace { +using namespace std::string_view_literals; +constexpr std::string_view SKIP_CRR_HDR_NAME = "X-Skip-Crr"sv; +constexpr std::string_view SKIP_CRR_HDR_VALUE = "-"sv; + struct PluginInfo { Config config; TSCont read_resp_hdr_contp; @@ -121,6 +126,14 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) if (should_skip_this_obj(txnp, config)) { TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_resp_hdr_contp); TSHttpTxnHookAdd(txnp, TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, read_resp_hdr_contp); + + // If the client sends a range request, and we don't slice, the range request goes to cache_range_requests, and can be + // cached as a range request. We don't want that, and instead want to skip CRR entirely. + if (header.hasKey(TS_MIME_FIELD_RANGE, TS_MIME_LEN_RANGE)) { + HttpHeader mutable_header(hdrmgr.m_buffer, hdrmgr.m_lochdr); + mutable_header.setKeyVal(SKIP_CRR_HDR_NAME.data(), SKIP_CRR_HDR_NAME.length(), SKIP_CRR_HDR_VALUE.data(), + SKIP_CRR_HDR_VALUE.length()); + } return false; } diff --git a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py index 305f4285288..892e7864aeb 100644 --- a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py +++ b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py @@ -49,7 +49,6 @@ } server.addResponse("sessionlog.json", req_small, res_small) -large_body = "large object!" # large object, all in one slice req_large = { "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", @@ -57,12 +56,13 @@ } res_large = { "headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", - "body": large_body + "body": "unsliced large object!" } server.addResponse("sessionlog.json", req_large, res_large) # large object, this populates the individual slices in the server +large_body = "large object sliced!" body_len = len(large_body) slice_begin = 0 slice_end = 0 @@ -95,13 +95,13 @@ ts.Disk.plugin_config.AddLine('xdebug.so --enable=x-cache') ts.Disk.records_config.update( { - 'proxy.config.diags.debug.enabled': '1', - 'proxy.config.diags.debug.tags': 'http|cache|slice|xdebug', + 'proxy.config.diags.debug.enabled': '0', + 'proxy.config.diags.debug.tags': 'http|cache|slice|xdebug|cache_range_requests', }) curl_and_args = 'curl -s -D /dev/stdout -o /dev/stderr -x localhost:{}'.format(ts.Variables.port) + ' -H "x-debug: x-cache"' -# Test case 1: first request of small object +# Test case: first request of small object tr = Test.AddTestRun("Small request 1") ps = tr.Processes.Default ps.StartBefore(server, ready=When.PortOpen(server.Variables.Port)) @@ -112,7 +112,7 @@ ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') tr.StillRunningAfter = ts -# Test case 2: second request of small object - expect cache hit +# Test case: second request of small object - expect cache hit tr = Test.AddTestRun("Small request 2") ps = tr.Processes.Default ps.Command = curl_and_args + ' http://slice/small' @@ -121,29 +121,38 @@ ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') tr.StillRunningAfter = ts -# Test case 3: first request of large object - expect unsliced, cache write disabled +# Test case: range request of small object - expect cache hit (proxy.config.http.cache.range.lookup = 1) +tr = Test.AddTestRun("Small request - ranged") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' -r 1-2 http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('mo', 'expected mo') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts + +# Test case: first request of large object - expect unsliced, cache write disabled tr = Test.AddTestRun("Large request 1") ps = tr.Processes.Default ps.Command = curl_and_args + ' http://slice/large' ps.ReturnCode = 0 -ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') +ps.Streams.stderr.Content = Testers.ContainsExpression('unsliced large object!', 'expected large object') ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') tr.StillRunningAfter = ts -# Test case 4: first request of large object - expect sliced, cache miss +# Test case: first request of large object - expect sliced, cache miss tr = Test.AddTestRun("Large request 2") ps = tr.Processes.Default ps.Command = curl_and_args + ' http://slice/large' ps.ReturnCode = 0 -ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') +ps.Streams.stderr.Content = Testers.ContainsExpression('large object sliced!', 'expected large object') ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') tr.StillRunningAfter = ts -## Test case 4: first request of large object - expect cache hit -#tr = Test.AddTestRun("Large request 3") -#ps = tr.Processes.Default -#ps.Command = curl_and_args + ' http://slice/large' -#ps.ReturnCode = 0 -#ps.Streams.stderr.Content = Testers.ContainsExpression('large object!', 'expected large object') -#ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') -#tr.StillRunningAfter = ts +## Test case: first request of large object - expect cache hit +tr = Test.AddTestRun("Large request 3") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('large object sliced!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts From fd49d513d9e8f0e0fb5e2036eb4d6c7ad8fe8117 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 26 Jul 2024 13:50:07 -0500 Subject: [PATCH 13/18] Clang-format --- .../cache_range_requests.cc | 4 +-- plugins/slice/Config.h | 16 ++++++------ plugins/slice/cache.h | 12 ++++----- plugins/slice/server.cc | 2 +- plugins/slice/slice.cc | 14 +++++----- plugins/slice/unit-tests/test_cache.cc | 26 +++++++++---------- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/plugins/cache_range_requests/cache_range_requests.cc b/plugins/cache_range_requests/cache_range_requests.cc index a0960333548..99db0c5411a 100644 --- a/plugins/cache_range_requests/cache_range_requests.cc +++ b/plugins/cache_range_requests/cache_range_requests.cc @@ -207,8 +207,8 @@ static bool has_skip_crr_header(TSHttpTxn txnp) { TSMBuffer hdr_buf = nullptr; - TSMLoc hdr_loc = TS_NULL_MLOC; - bool ret = false; + TSMLoc hdr_loc = TS_NULL_MLOC; + bool ret = false; if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &hdr_buf, &hdr_loc)) { TSMLoc const skip_crr_loc = TSMimeHdrFieldFind(hdr_buf, hdr_loc, SKIP_CRR_HDR_NAME.data(), SKIP_CRR_HDR_NAME.length()); diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index d01df49929f..0256348fc50 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -46,9 +46,9 @@ struct Config { int m_paceerrsecs{0}; // -1 disable logging, 0 no pacing, max 60s int m_prefetchcount{0}; // 0 disables prefetching enum RefType { First, Relative }; - RefType m_reftype{First}; // reference slice is relative to request - bool m_head_strip_range{false}; // strip range header for head requests - uint64_t m_min_size_to_slice{0}; // Only strip objects larger than this + RefType m_reftype{First}; // reference slice is relative to request + bool m_head_strip_range{false}; // strip range header for head requests + uint64_t m_min_size_to_slice{0}; // Only strip objects larger than this std::string m_skip_header; std::string m_crr_ims_header; @@ -86,12 +86,12 @@ struct Config { // Metadata cache stats std::string stat_prefix{}; - int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}, stat_no_cl{0}, stat_bad_cl{0}, stat_no_url{0}; - bool stats_enabled{false}; + int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}, stat_no_cl{0}, stat_bad_cl{0}, stat_no_url{0}; + bool stats_enabled{false}; private: - TSHRTime m_nextlogtime{0}; // next time to log in ns - std::mutex m_mutex; + TSHRTime m_nextlogtime{0}; // next time to log in ns + std::mutex m_mutex; std::optional m_oscache; - void setCacheSize(size_t entries); + void setCacheSize(size_t entries); }; diff --git a/plugins/slice/cache.h b/plugins/slice/cache.h index 76abcdb81c4..af3f5d5ab14 100644 --- a/plugins/slice/cache.h +++ b/plugins/slice/cache.h @@ -68,11 +68,11 @@ class ObjectSizeCache private: void find_eviction_slot(); - cache_size_type _cache_capacity; - cache_size_type _hand{0}; - std::vector _urls; - std::vector _object_sizes; - std::vector _visits; + cache_size_type _cache_capacity; + cache_size_type _hand{0}; + std::vector _urls; + std::vector _object_sizes; + std::vector _visits; std::unordered_map _index; - std::mutex _mutex; + std::mutex _mutex; }; diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index 12ffea6c0f5..aeb1b0de8ce 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -91,7 +91,7 @@ enum HeaderState { static void update_object_size(TSHttpTxn txnp, int64_t size, Config &config) { - int urllen = 0; + int urllen = 0; char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { if (size <= 0) { diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index c97688a33ec..515a0ed81dd 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -49,7 +49,7 @@ TSCont global_read_resp_hdr_contp; static bool should_skip_this_obj(TSHttpTxn txnp, Config *const config) { - int len = 0; + int len = 0; char *const urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &len); if (!config->isKnownLargeObj({urlstr, static_cast(len)})) { @@ -206,7 +206,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) // we'll intercept this GET and do it ourselves TSMutex const mutex = TSContMutexGet(reinterpret_cast(txnp)); - TSCont const icontp(TSContCreate(intercept_hook, mutex)); + TSCont const icontp(TSContCreate(intercept_hook, mutex)); TSContDataSet(icontp, data.release()); // Skip remap and remap rule requirement @@ -227,7 +227,7 @@ read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) static int read_resp_hdr(TSCont contp, TSEvent event, void *edata) { - TSHttpTxn txnp = static_cast(edata); + TSHttpTxn txnp = static_cast(edata); PluginInfo *info = static_cast(TSContDataGet(contp)); // This function does the following things: @@ -235,15 +235,15 @@ read_resp_hdr(TSCont contp, TSEvent event, void *edata) // 2. Cache the object size // 3. If the object will be sliced in subsequent requests, turn off the cache to avoid taking up space, and head-of-line blocking. - int urllen = 0; + int urllen = 0; char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { - TxnHdrMgr response; + TxnHdrMgr response; TxnHdrMgr::HeaderGetFunc func = event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE ? TSHttpTxnCachedRespGet : TSHttpTxnServerRespGet; response.populateFrom(txnp, func); HttpHeader const resp_header(response.m_buffer, response.m_lochdr); - char constr[1024]; - int conlen = sizeof constr; + char constr[1024]; + int conlen = sizeof constr; bool const hasContentLength(resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); if (hasContentLength) { uint64_t content_length; diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc index f4decc3c8b8..fda55a3079b 100644 --- a/plugins/slice/unit-tests/test_cache.cc +++ b/plugins/slice/unit-tests/test_cache.cc @@ -33,7 +33,7 @@ using namespace std::string_view_literals; TEST_CASE("cache miss", "[slice][metadatacache]") { ObjectSizeCache cache{1024}; - std::optional res = cache.get("example.com"sv); + std::optional res = cache.get("example.com"sv); CHECK(res == std::nullopt); } @@ -60,7 +60,7 @@ TEST_CASE("cache remove", "[slice][metadatacache]") TEST_CASE("eviction", "[slice][metadatacache]") { - constexpr int cache_size = 10; + constexpr int cache_size = 10; ObjectSizeCache cache{cache_size}; for (uint64_t i = 0; i < cache_size * 100; i++) { std::stringstream ss; @@ -82,7 +82,7 @@ TEST_CASE("eviction", "[slice][metadatacache]") TEST_CASE("tiny cache", "[slice][metadatacache]") { - constexpr int cache_size = 1; + constexpr int cache_size = 1; ObjectSizeCache cache{cache_size}; for (uint64_t i = 0; i < cache_size * 100; i++) { std::stringstream ss; @@ -104,15 +104,15 @@ TEST_CASE("tiny cache", "[slice][metadatacache]") TEST_CASE("hit rate", "[slice][metadatacache]") { - constexpr int cache_size = 10; - ObjectSizeCache cache{cache_size}; - std::mt19937 gen; + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + std::mt19937 gen; std::poisson_distribution d{cache_size}; - std::atomic hits{0}, misses{0}; + std::atomic hits{0}, misses{0}; for (uint64_t i = 0; i < cache_size * 100; i++) { std::stringstream ss; - uint64_t obj = d(gen); + uint64_t obj = d(gen); ss << "http://example.com/" << obj; std::optional size = cache.get(ss.str()); @@ -132,18 +132,18 @@ TEST_CASE("hit rate", "[slice][metadatacache]") TEST_CASE("threads", "[slice][metadatacache]") { - constexpr int cache_size = 10; + constexpr int cache_size = 10; ObjectSizeCache cache{cache_size}; - std::mt19937 gen; + std::mt19937 gen; std::poisson_distribution d{cache_size}; - std::vector threads; - std::atomic hits{0}, misses{0}; + std::vector threads; + std::atomic hits{0}, misses{0}; auto runfunc = [&]() { for (uint64_t i = 0; i < cache_size * 100; i++) { std::stringstream ss; - uint64_t obj = d(gen); + uint64_t obj = d(gen); ss << "http://example.com/" << obj; std::optional size = cache.get(ss.str()); From 317ecbd7fa0330518af761ab080e2c4a8a011853 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 26 Jul 2024 14:05:49 -0500 Subject: [PATCH 14/18] Clean up PR --- plugins/cache_range_requests/cache_range_requests.cc | 9 ++++----- plugins/slice/CMakeLists.txt | 2 +- plugins/slice/Config.h | 2 +- plugins/slice/{cache.cc => ObjectSizeCache.cc} | 2 +- plugins/slice/{cache.h => ObjectSizeCache.h} | 0 plugins/slice/unit-tests/CMakeLists.txt | 4 ++-- plugins/slice/unit-tests/test_cache.cc | 2 +- 7 files changed, 10 insertions(+), 11 deletions(-) rename plugins/slice/{cache.cc => ObjectSizeCache.cc} (99%) rename plugins/slice/{cache.h => ObjectSizeCache.h} (100%) diff --git a/plugins/cache_range_requests/cache_range_requests.cc b/plugins/cache_range_requests/cache_range_requests.cc index 99db0c5411a..37e81753f5a 100644 --- a/plugins/cache_range_requests/cache_range_requests.cc +++ b/plugins/cache_range_requests/cache_range_requests.cc @@ -50,11 +50,10 @@ enum parent_select_mode_t { PS_CACHEKEY_URL, // Set parent selection url to cache_key url }; -using namespace std::string_view_literals; -constexpr std::string_view DefaultImsHeader = "X-Crr-Ims"sv; -constexpr std::string_view SLICE_CRR_HEADER = "Slice-Crr-Status"sv; -constexpr std::string_view SLICE_CRR_VAL = "1"sv; -constexpr std::string_view SKIP_CRR_HDR_NAME = "X-Skip-Crr"sv; +constexpr std::string_view DefaultImsHeader = {"X-Crr-Ims"}; +constexpr std::string_view SLICE_CRR_HEADER = {"Slice-Crr-Status"}; +constexpr std::string_view SLICE_CRR_VAL = "1"; +constexpr std::string_view SKIP_CRR_HDR_NAME = {"X-Skip-Crr"}; struct pluginconfig { parent_select_mode_t ps_mode{PS_DEFAULT}; diff --git a/plugins/slice/CMakeLists.txt b/plugins/slice/CMakeLists.txt index 3c79739c14a..e82bc46c5c8 100644 --- a/plugins/slice/CMakeLists.txt +++ b/plugins/slice/CMakeLists.txt @@ -31,7 +31,7 @@ add_atsplugin( slice.cc transfer.cc util.cc - cache.cc + ObjectSizeCache.cc ) target_link_libraries(slice PRIVATE PCRE::PCRE) diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index 0256348fc50..b7694a861ab 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -19,7 +19,7 @@ #pragma once #include "slice.h" -#include "cache.h" +#include "ObjectSizeCache.h" #ifdef HAVE_PCRE_PCRE_H #include diff --git a/plugins/slice/cache.cc b/plugins/slice/ObjectSizeCache.cc similarity index 99% rename from plugins/slice/cache.cc rename to plugins/slice/ObjectSizeCache.cc index 28d5027de6c..2b7cec84b65 100644 --- a/plugins/slice/cache.cc +++ b/plugins/slice/ObjectSizeCache.cc @@ -21,7 +21,7 @@ limitations under the License. */ -#include "cache.h" +#include "ObjectSizeCache.h" #include ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) diff --git a/plugins/slice/cache.h b/plugins/slice/ObjectSizeCache.h similarity index 100% rename from plugins/slice/cache.h rename to plugins/slice/ObjectSizeCache.h diff --git a/plugins/slice/unit-tests/CMakeLists.txt b/plugins/slice/unit-tests/CMakeLists.txt index 098372a3cce..b616af17a74 100644 --- a/plugins/slice/unit-tests/CMakeLists.txt +++ b/plugins/slice/unit-tests/CMakeLists.txt @@ -25,12 +25,12 @@ target_compile_definitions(test_range PRIVATE UNITTEST) target_link_libraries(test_range PRIVATE catch2::catch2 ts::tsutil) add_test(NAME test_range COMMAND test_range) -add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc ${PROJECT_SOURCE_DIR}/cache.cc) +add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc ${PROJECT_SOURCE_DIR}/ObjectSizeCache.cc) target_compile_definitions(test_config PRIVATE UNITTEST) target_link_libraries(test_config PRIVATE PCRE::PCRE catch2::catch2 ts::tsutil) add_test(NAME test_config COMMAND test_config) -add_executable(test_cache test_cache.cc ${PROJECT_SOURCE_DIR}/cache.cc) +add_executable(test_cache test_cache.cc ${PROJECT_SOURCE_DIR}/ObjectSizeCache.cc) target_compile_definitions(test_cache PRIVATE UNITTEST) target_link_libraries(test_cache PRIVATE catch2::catch2 ts::tsutil) add_test(NAME test_cache COMMAND test_cache) diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc index fda55a3079b..bd4a2f48bb2 100644 --- a/plugins/slice/unit-tests/test_cache.cc +++ b/plugins/slice/unit-tests/test_cache.cc @@ -27,7 +27,7 @@ #include #define CATCH_CONFIG_MAIN /* include main function */ #include "catch.hpp" /* catch unit-test framework */ -#include "../cache.h" +#include "../ObjectSizeCache.h" using namespace std::string_view_literals; TEST_CASE("cache miss", "[slice][metadatacache]") From bf32837e646b9aa013ee088826dfa43e327f49c1 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 26 Jul 2024 15:30:16 -0500 Subject: [PATCH 15/18] Add doc for cond slice --- doc/admin-guide/plugins/slice.en.rst | 35 ++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/doc/admin-guide/plugins/slice.en.rst b/doc/admin-guide/plugins/slice.en.rst index 9fa2832d8a3..b0de812aa26 100644 --- a/doc/admin-guide/plugins/slice.en.rst +++ b/doc/admin-guide/plugins/slice.en.rst @@ -153,6 +153,41 @@ The slice plugin supports the following options:: Enable slice plugin to strip Range header for HEAD requests. -h for short + --minimum-size (optional) + --metadata-cache-size (optional) + --stats-prefix (optional) + In combination, these three options allow for conditional slice. + Specify the minimum size object to slice with --minimum-size. Allowed + values are the same as --blockbytes. Conditional slicing uses a cache + of object sizes to make the decision of whether to slice. The cache + will only store the URL of large objects as they are discovered in + origin responses. You should set the --metadata-cache-size to by + estimating the working set size of large objects. You can use + stats to determine whether --metadata-cache-size was set optimally. + Stat names are prefixed with the value of --stats-prefix. The names + are: + + .metadata_cache.true_large_objects - large object cache hits + .metadata_cache.true_small_objects - small object cache hits + .metadata_cache.false_large_objects - large object cache misses + .metadata_cache.false_small_objects - small object cache misses + .metadata_cache.no_content_length - number of responses without content length + .metadata_cache.bad_content_length - number of responses with invalid content length + .metadata_cache.no_url - number of responses where URL parsing failed + + If an object size is not found in the object size cache, the plugin + will not slice the object, and will turn off ATS cache on this request. + The object size will be cached in following requests, and slice will + proceed normally if the object meets the minimum size requirement. + + Range requests from the client for small objects are passed through the + plugin unchanged. If you use the `cache_range_requests` plugin, slice plugin + will communicate with `cache_range_requests` using an internal header + that causes `cache_range_requests` to be bypassed in such requests, and + allow ATS to handle those range requests internally. + + + Examples:: @plugin=slice.so @pparam=--blockbytes=1000000 @plugin=cache_range_requests.so From 7fe6eb0d4ea8170230da9fff947e6dc00c60382c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 29 Jul 2024 14:08:25 -0500 Subject: [PATCH 16/18] Fix build issues --- plugins/slice/Config.cc | 4 ++-- plugins/slice/server.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 93b3c9095e9..9d62dd71f87 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -245,9 +245,9 @@ Config::fromArgs(int const argc, char const *const argv[]) size_t size = std::stoul(optarg); setCacheSize(size); DEBUG_LOG("Metadata cache size: %zu entries", size); - } catch (std::invalid_argument e) { + } catch (const std::invalid_argument &e) { ERROR_LOG("Invalid metadata cache size argument: %s", optarg); - } catch (std::out_of_range e) { + } catch (const std::out_of_range &e) { ERROR_LOG("Metadata cache size out of range: %s", optarg); } } break; diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index aeb1b0de8ce..bad9947994e 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -95,7 +95,7 @@ update_object_size(TSHttpTxn txnp, int64_t size, Config &config) char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); if (urlstr != nullptr) { if (size <= 0) { - DEBUG_LOG("Ignoring invalid content length for %.*s: %lld", urllen, urlstr, size); + DEBUG_LOG("Ignoring invalid content length for %.*s: %" PRId64, urllen, urlstr, size); return; } From d8299b5908026bf9a10d7568ff5866a5d1880e8a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 29 Jul 2024 16:55:10 -0500 Subject: [PATCH 17/18] Add missing include --- plugins/slice/unit-tests/test_cache.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc index bd4a2f48bb2..2c744337519 100644 --- a/plugins/slice/unit-tests/test_cache.cc +++ b/plugins/slice/unit-tests/test_cache.cc @@ -25,6 +25,7 @@ #include #include #include +#include #define CATCH_CONFIG_MAIN /* include main function */ #include "catch.hpp" /* catch unit-test framework */ #include "../ObjectSizeCache.h" From a86487e09bccebf1086ca4026daabbd65e1469b4 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 9 Aug 2024 15:06:49 -0500 Subject: [PATCH 18/18] Code review changes * Add more docs about various cases * Update autest loop --- doc/admin-guide/plugins/slice.en.rst | 30 +++++++++++++++++++ .../slice/slice_conditional.test.py | 6 ++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/doc/admin-guide/plugins/slice.en.rst b/doc/admin-guide/plugins/slice.en.rst index b0de812aa26..9215d4cc845 100644 --- a/doc/admin-guide/plugins/slice.en.rst +++ b/doc/admin-guide/plugins/slice.en.rst @@ -342,6 +342,36 @@ The functionality works with `--ref-relative` both enabled and disabled. If `--r disabled (using slice 0 as the reference block), requesting to PURGE a block that does not have slice 0 in its range will still PURGE the slice 0 block, as the reference block is always processed. +Conditional Slicing +------------------- + +The goal of conditional slicing is to slice large objects and avoid the cost of slicing on small +objects. If `--minimum-size` is specified, conditional slicing is enabled and works as follows. + +The plugin builds a object size cache in memory. The key is the URL of the object. Only +large object URLs are written to the cache. The object size cache uses CLOCK eviction algorithm +in order to have lazy promotion behavior. + +When a URL not found in the object size cache, the plugin treats the object as a small object. It +will not intercept the request. The request is processed by ATS without any slice logic. Upon +receiving a response, the slice plugin will check the response content length to update the object +size cache if necessary. + +When a large URL is requested for the first time, conditional slicing will not intercept that +request since the URL is not known to be large. This will cause an ATS cache miss and the request +will go to origin server. Slice plugin will turn off writing to cache for this response, because +it expects to slice this object in future requests. + +If the object size cache evicts a URL, the size of the object for that URL will need to be learned +again in a subsequent request, and the behavior above will happen again. + +If the URL is found in the object size cache, conditional slicing treats the object as a large object +and will activate the slicing logic as described in the rest of this document. + +If the client sends a range request, and that URL is not in the object size cache, the slice plugin +will forward the range request to ATS core. It also attaches an internal header in order to deactivate +the `cache_range_requests` plugin for this range request. + Important Notes =============== diff --git a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py index 892e7864aeb..fe39152979d 100644 --- a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py +++ b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py @@ -65,10 +65,8 @@ large_body = "large object sliced!" body_len = len(large_body) slice_begin = 0 -slice_end = 0 slice_block_size = 10 -while (slice_end < body_len): - # 1st slice +while (slice_begin < body_len): slice_end = slice_begin + slice_block_size req_large_slice = { "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + f"Range: bytes={slice_begin}-{slice_end - 1}" + "\r\n", @@ -83,7 +81,7 @@ "body": large_body[slice_begin:slice_end] } server.addResponse("sessionlog.json", req_large_slice, res_large_slice) - slice_begin = slice_end + slice_begin += slice_block_size # set up slice plugin with remap host into cache_range_requests ts.Disk.remap_config.AddLines(