diff --git a/doc/admin-guide/plugins/slice.en.rst b/doc/admin-guide/plugins/slice.en.rst index 9fa2832d8a3..9215d4cc845 100644 --- a/doc/admin-guide/plugins/slice.en.rst +++ b/doc/admin-guide/plugins/slice.en.rst @@ -153,6 +153,41 @@ The slice plugin supports the following options:: Enable slice plugin to strip Range header for HEAD requests. -h for short + --minimum-size (optional) + --metadata-cache-size (optional) + --stats-prefix (optional) + In combination, these three options allow for conditional slice. + Specify the minimum size object to slice with --minimum-size. Allowed + values are the same as --blockbytes. Conditional slicing uses a cache + of object sizes to make the decision of whether to slice. The cache + will only store the URL of large objects as they are discovered in + origin responses. You should set the --metadata-cache-size to by + estimating the working set size of large objects. You can use + stats to determine whether --metadata-cache-size was set optimally. + Stat names are prefixed with the value of --stats-prefix. The names + are: + + .metadata_cache.true_large_objects - large object cache hits + .metadata_cache.true_small_objects - small object cache hits + .metadata_cache.false_large_objects - large object cache misses + .metadata_cache.false_small_objects - small object cache misses + .metadata_cache.no_content_length - number of responses without content length + .metadata_cache.bad_content_length - number of responses with invalid content length + .metadata_cache.no_url - number of responses where URL parsing failed + + If an object size is not found in the object size cache, the plugin + will not slice the object, and will turn off ATS cache on this request. + The object size will be cached in following requests, and slice will + proceed normally if the object meets the minimum size requirement. + + Range requests from the client for small objects are passed through the + plugin unchanged. If you use the `cache_range_requests` plugin, slice plugin + will communicate with `cache_range_requests` using an internal header + that causes `cache_range_requests` to be bypassed in such requests, and + allow ATS to handle those range requests internally. + + + Examples:: @plugin=slice.so @pparam=--blockbytes=1000000 @plugin=cache_range_requests.so @@ -307,6 +342,36 @@ The functionality works with `--ref-relative` both enabled and disabled. If `--r disabled (using slice 0 as the reference block), requesting to PURGE a block that does not have slice 0 in its range will still PURGE the slice 0 block, as the reference block is always processed. +Conditional Slicing +------------------- + +The goal of conditional slicing is to slice large objects and avoid the cost of slicing on small +objects. If `--minimum-size` is specified, conditional slicing is enabled and works as follows. + +The plugin builds a object size cache in memory. The key is the URL of the object. Only +large object URLs are written to the cache. The object size cache uses CLOCK eviction algorithm +in order to have lazy promotion behavior. + +When a URL not found in the object size cache, the plugin treats the object as a small object. It +will not intercept the request. The request is processed by ATS without any slice logic. Upon +receiving a response, the slice plugin will check the response content length to update the object +size cache if necessary. + +When a large URL is requested for the first time, conditional slicing will not intercept that +request since the URL is not known to be large. This will cause an ATS cache miss and the request +will go to origin server. Slice plugin will turn off writing to cache for this response, because +it expects to slice this object in future requests. + +If the object size cache evicts a URL, the size of the object for that URL will need to be learned +again in a subsequent request, and the behavior above will happen again. + +If the URL is found in the object size cache, conditional slicing treats the object as a large object +and will activate the slicing logic as described in the rest of this document. + +If the client sends a range request, and that URL is not in the object size cache, the slice plugin +will forward the range request to ATS core. It also attaches an internal header in order to deactivate +the `cache_range_requests` plugin for this range request. + Important Notes =============== diff --git a/plugins/cache_range_requests/cache_range_requests.cc b/plugins/cache_range_requests/cache_range_requests.cc index 4481b60ce4d..37e81753f5a 100644 --- a/plugins/cache_range_requests/cache_range_requests.cc +++ b/plugins/cache_range_requests/cache_range_requests.cc @@ -50,9 +50,10 @@ enum parent_select_mode_t { PS_CACHEKEY_URL, // Set parent selection url to cache_key url }; -constexpr std::string_view DefaultImsHeader = {"X-Crr-Ims"}; -constexpr std::string_view SLICE_CRR_HEADER = {"Slice-Crr-Status"}; -constexpr std::string_view SLICE_CRR_VAL = "1"; +constexpr std::string_view DefaultImsHeader = {"X-Crr-Ims"}; +constexpr std::string_view SLICE_CRR_HEADER = {"Slice-Crr-Status"}; +constexpr std::string_view SLICE_CRR_VAL = "1"; +constexpr std::string_view SKIP_CRR_HDR_NAME = {"X-Skip-Crr"}; struct pluginconfig { parent_select_mode_t ps_mode{PS_DEFAULT}; @@ -86,6 +87,7 @@ bool set_header(TSMBuffer, TSMLoc, const char *, int, const char int transaction_handler(TSCont, TSEvent, void *); struct pluginconfig *create_pluginconfig(int argc, char *const argv[]); void delete_pluginconfig(pluginconfig *const); +static bool has_skip_crr_header(TSHttpTxn); /** * Creates pluginconfig data structure @@ -192,12 +194,32 @@ handle_read_request_header(TSCont /* txn_contp ATS_UNUSED */, TSEvent /* event A { TSHttpTxn txnp = static_cast(edata); - range_header_check(txnp, gPluginConfig); + if (!has_skip_crr_header(txnp)) { + range_header_check(txnp, gPluginConfig); + } TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); return 0; } +static bool +has_skip_crr_header(TSHttpTxn txnp) +{ + TSMBuffer hdr_buf = nullptr; + TSMLoc hdr_loc = TS_NULL_MLOC; + bool ret = false; + + if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &hdr_buf, &hdr_loc)) { + TSMLoc const skip_crr_loc = TSMimeHdrFieldFind(hdr_buf, hdr_loc, SKIP_CRR_HDR_NAME.data(), SKIP_CRR_HDR_NAME.length()); + if (TS_NULL_MLOC != skip_crr_loc) { + TSHandleMLocRelease(hdr_buf, hdr_loc, skip_crr_loc); + ret = true; + } + TSHandleMLocRelease(hdr_buf, TS_NULL_MLOC, hdr_loc); + } + return ret; +} + /** * Reads the client request header and if this is a range request: * @@ -682,7 +704,9 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo * /* rri */) { pluginconfig *const pc = static_cast(ih); - range_header_check(txnp, pc); + if (!has_skip_crr_header(txnp)) { + range_header_check(txnp, pc); + } return TSREMAP_NO_REMAP; } diff --git a/plugins/slice/CMakeLists.txt b/plugins/slice/CMakeLists.txt index f63bd2efe4e..e82bc46c5c8 100644 --- a/plugins/slice/CMakeLists.txt +++ b/plugins/slice/CMakeLists.txt @@ -31,6 +31,7 @@ add_atsplugin( slice.cc transfer.cc util.cc + ObjectSizeCache.cc ) target_link_libraries(slice PRIVATE PCRE::PCRE) diff --git a/plugins/slice/Config.cc b/plugins/slice/Config.cc index 3a7ef883f70..9d62dd71f87 100644 --- a/plugins/slice/Config.cc +++ b/plugins/slice/Config.cc @@ -18,6 +18,7 @@ #include "Config.h" +#include #include #include #include @@ -123,13 +124,16 @@ Config::fromArgs(int const argc, char const *const argv[]) {const_cast("blockbytes-test"), required_argument, nullptr, 't'}, {const_cast("prefetch-count"), required_argument, nullptr, 'f'}, {const_cast("strip-range-for-head"), no_argument, nullptr, 'h'}, + {const_cast("minimum-size"), required_argument, nullptr, 'm'}, + {const_cast("metadata-cache-size"), required_argument, nullptr, 'z'}, + {const_cast("stats-prefix"), required_argument, nullptr, 'x'}, {nullptr, 0, nullptr, 0 }, }; // getopt assumes args start at '1' so this hack is needed char *const *argvp = (const_cast(argv) - 1); for (;;) { - int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lp:r:s:t:", longopts, nullptr); + int const opt = getopt_long(argc + 1, argvp, "b:dc:e:i:lm:p:r:s:t:x:z:", longopts, nullptr); if (-1 == opt) { break; } @@ -228,6 +232,29 @@ Config::fromArgs(int const argc, char const *const argv[]) case 'h': { m_head_strip_range = true; } break; + case 'm': { + int64_t const bytesread = bytesFrom(optarg); + if (bytesread < 0) { + DEBUG_LOG("Invalid minimum-size: %s", optarg); + } + m_min_size_to_slice = bytesread; + DEBUG_LOG("Only slicing objects %" PRIu64 " bytes or larger", m_min_size_to_slice); + } break; + case 'z': { + try { + size_t size = std::stoul(optarg); + setCacheSize(size); + DEBUG_LOG("Metadata cache size: %zu entries", size); + } catch (const std::invalid_argument &e) { + ERROR_LOG("Invalid metadata cache size argument: %s", optarg); + } catch (const std::out_of_range &e) { + ERROR_LOG("Metadata cache size out of range: %s", optarg); + } + } break; + case 'x': { + stat_prefix = optarg; + DEBUG_LOG("Stat prefix: %s", stat_prefix.c_str()); + } break; default: break; } @@ -256,6 +283,15 @@ Config::fromArgs(int const argc, char const *const argv[]) DEBUG_LOG("Using default slice skip header %s", m_skip_header.c_str()); } + if (m_min_size_to_slice > 0) { + if (m_oscache.has_value()) { + DEBUG_LOG("Metadata cache size: %zu", m_oscache->cache_capacity()); + } else { + ERROR_LOG("--metadata-cache-size is required when --minimum-size is specified! Using a default size of 16384."); + setCacheSize(16384); + } + } + return true; } @@ -309,3 +345,51 @@ Config::matchesRegex(char const *const url, int const urllen) const return matches; } + +void +Config::setCacheSize(size_t entries) +{ + if (entries == 0) { + m_oscache.reset(); + } else { + m_oscache.emplace(entries); + } +} + +bool +Config::isKnownLargeObj(std::string_view url) +{ + if (m_min_size_to_slice <= 0) { + // If conditional slicing is not set, all objects are large enough to slice + return true; + } + + assert(m_oscache.has_value()); // object size cache is always present when conditionally slicing + std::optional size = m_oscache->get(url); + if (size.has_value()) { + DEBUG_LOG("Found url in cache: %.*s -> %" PRIu64, static_cast(url.size()), url.data(), size.value()); + if (size.value() >= m_min_size_to_slice) { + return true; + } + } + + return false; +} + +void +Config::sizeCacheAdd(std::string_view url, uint64_t size) +{ + if (m_oscache) { + DEBUG_LOG("Adding url to cache: %.*s -> %" PRIu64, static_cast(url.size()), url.data(), size); + m_oscache->set(url, size); + } +} + +void +Config::sizeCacheRemove(std::string_view url) +{ + if (m_oscache) { + DEBUG_LOG("Removing url from cache: %.*s", static_cast(url.size()), url.data()); + m_oscache->remove(url); + } +} diff --git a/plugins/slice/Config.h b/plugins/slice/Config.h index c1e630447ad..b7694a861ab 100644 --- a/plugins/slice/Config.h +++ b/plugins/slice/Config.h @@ -19,6 +19,7 @@ #pragma once #include "slice.h" +#include "ObjectSizeCache.h" #ifdef HAVE_PCRE_PCRE_H #include @@ -45,8 +46,9 @@ struct Config { int m_paceerrsecs{0}; // -1 disable logging, 0 no pacing, max 60s int m_prefetchcount{0}; // 0 disables prefetching enum RefType { First, Relative }; - RefType m_reftype{First}; // reference slice is relative to request - bool m_head_strip_range{false}; // strip range header for head requests + RefType m_reftype{First}; // reference slice is relative to request + bool m_head_strip_range{false}; // strip range header for head requests + uint64_t m_min_size_to_slice{0}; // Only strip objects larger than this std::string m_skip_header; std::string m_crr_ims_header; @@ -73,7 +75,23 @@ struct Config { // If no null reg, true, otherwise check against regex bool matchesRegex(char const *const url, int const urllen) const; + // Add an object size to cache + void sizeCacheAdd(std::string_view url, uint64_t size); + + // Remove an object size + void sizeCacheRemove(std::string_view url); + + // Did we cache this internally as a small object? + bool isKnownLargeObj(std::string_view url); + + // Metadata cache stats + std::string stat_prefix{}; + int stat_TP{0}, stat_TN{0}, stat_FP{0}, stat_FN{0}, stat_no_cl{0}, stat_bad_cl{0}, stat_no_url{0}; + bool stats_enabled{false}; + private: - TSHRTime m_nextlogtime{0}; // next time to log in ns - std::mutex m_mutex; + TSHRTime m_nextlogtime{0}; // next time to log in ns + std::mutex m_mutex; + std::optional m_oscache; + void setCacheSize(size_t entries); }; diff --git a/plugins/slice/ObjectSizeCache.cc b/plugins/slice/ObjectSizeCache.cc new file mode 100644 index 00000000000..2b7cec84b65 --- /dev/null +++ b/plugins/slice/ObjectSizeCache.cc @@ -0,0 +1,116 @@ +/** @file cache.cc + + Metadata cache to store object sizes. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "ObjectSizeCache.h" +#include + +ObjectSizeCache::ObjectSizeCache(cache_size_type cache_size) + : _cache_capacity(cache_size), _urls(cache_size), _object_sizes(cache_size, 0), _visits(cache_size, false) +{ +} + +std::optional +ObjectSizeCache::get(const std::string_view url) +{ + std::lock_guard lock{_mutex}; + if (auto it = _index.find(url); it != _index.end()) { + // Cache hit + cache_size_type i = it->second; + _visits[i] = true; + assert(url == _urls[i]); + return _object_sizes[i]; + } else { + // Cache miss + return std::nullopt; + } +} + +void +ObjectSizeCache::set(const std::string_view url, uint64_t object_size) +{ + std::lock_guard lock{_mutex}; + cache_size_type i; + if (auto it = _index.find(url); it != _index.end()) { + // Already exists in cache. Overwrite. + i = it->second; + } else { + // Doesn't exist in cache. Evict something else. + find_eviction_slot(); + i = _hand; + _urls[i] = url; + _index[_urls[i]] = _hand; + _hand++; + if (_hand >= _cache_capacity) { + _hand = 0; + } + } + _object_sizes[i] = object_size; +} + +void +ObjectSizeCache::remove(const std::string_view url) +{ + std::lock_guard lock{_mutex}; + if (auto it = _index.find(url); it != _index.end()) { + cache_size_type i = it->second; + _visits[i] = false; + _urls[i].erase(); + _index.erase(it); + } +} + +/** + * @brief Make _hand point to the next entry that should be replaced, and clear that entry if it exists. + * + */ +void +ObjectSizeCache::find_eviction_slot() +{ + while (_visits[_hand]) { + _visits[_hand] = false; + _hand++; + if (_hand >= _cache_capacity) { + _hand = 0; + } + } + + std::string_view evicted_url = _urls[_hand]; + if (!evicted_url.empty()) { + auto it = _index.find(evicted_url); + assert(it != _index.end()); + _index.erase(it); + _urls[_hand].erase(); + } +} + +ObjectSizeCache::cache_size_type +ObjectSizeCache::cache_capacity() +{ + return _cache_capacity; +} + +ObjectSizeCache::cache_size_type +ObjectSizeCache::cache_count() +{ + return _index.size(); +} diff --git a/plugins/slice/ObjectSizeCache.h b/plugins/slice/ObjectSizeCache.h new file mode 100644 index 00000000000..af3f5d5ab14 --- /dev/null +++ b/plugins/slice/ObjectSizeCache.h @@ -0,0 +1,78 @@ +/** @file cache.h + + Metadata cache to store object sizes. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +class ObjectSizeCache +{ +public: + using cache_size_type = size_t; + using object_size_type = uint64_t; + + ObjectSizeCache(cache_size_type capacity); + + /** + * @brief Get an object size from cache. + * + * @param url The URL of the object + * @return std::optional If the object size was found, return the size of the object. If not, return std::nullopt. + */ + std::optional get(const std::string_view url); + + /** + * @brief Add an object size to cache. + * + * @param url The URL of the object + * @param object_size The size of the object + */ + void set(const std::string_view url, object_size_type object_size); + + /** + * @brief Remove an object from cache + * + * @param url The URL of the object + */ + void remove(const std::string_view url); + + // Max capacity of the cache + cache_size_type cache_capacity(); + + // Current number of used entries in the cache + cache_size_type cache_count(); + +private: + void find_eviction_slot(); + + cache_size_type _cache_capacity; + cache_size_type _hand{0}; + std::vector _urls; + std::vector _object_sizes; + std::vector _visits; + std::unordered_map _index; + std::mutex _mutex; +}; diff --git a/plugins/slice/server.cc b/plugins/slice/server.cc index 2770d2a85d5..bad9947994e 100644 --- a/plugins/slice/server.cc +++ b/plugins/slice/server.cc @@ -23,6 +23,7 @@ #include "HttpHeader.h" #include "response.h" #include "transfer.h" +#include "ts/apidefs.h" #include "util.h" #include @@ -87,6 +88,31 @@ enum HeaderState { Passthru, }; +static void +update_object_size(TSHttpTxn txnp, int64_t size, Config &config) +{ + int urllen = 0; + char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); + if (urlstr != nullptr) { + if (size <= 0) { + DEBUG_LOG("Ignoring invalid content length for %.*s: %" PRId64, urllen, urlstr, size); + return; + } + + if (static_cast(size) >= config.m_min_size_to_slice) { + config.sizeCacheAdd({urlstr, static_cast(urllen)}, static_cast(size)); + TSStatIntIncrement(config.stat_TP, 1); + } else { + config.sizeCacheRemove({urlstr, static_cast(urllen)}); + TSStatIntIncrement(config.stat_FP, 1); + } + + TSfree(urlstr); + } else { + ERROR_LOG("Could not get URL from transaction."); + } +} + HeaderState handleFirstServerHeader(Data *const data, TSCont const contp) { @@ -121,6 +147,7 @@ handleFirstServerHeader(Data *const data, TSCont const contp) } DEBUG_LOG("Passthru bytes: header: %" PRId64 " body: %" PRId64, hlen, clen); if (clen != INT64_MAX) { + update_object_size(data->m_txnp, clen, *data->m_config); TSVIONBytesSet(output_vio, hlen + clen); } else { TSVIONBytesSet(output_vio, clen); @@ -140,6 +167,8 @@ handleFirstServerHeader(Data *const data, TSCont const contp) return HeaderState::Fail; } + update_object_size(data->m_txnp, blockcr.m_length, *data->m_config); + // set the resource content length from block response data->m_contentlen = blockcr.m_length; diff --git a/plugins/slice/slice.cc b/plugins/slice/slice.cc index 0b62658262a..515a0ed81dd 100644 --- a/plugins/slice/slice.cc +++ b/plugins/slice/slice.cc @@ -23,18 +23,45 @@ #include "HttpHeader.h" #include "intercept.h" +#include "ts/apidefs.h" #include "ts/remap.h" #include "ts/ts.h" +#include #include -#include +#include +#include namespace { +using namespace std::string_view_literals; +constexpr std::string_view SKIP_CRR_HDR_NAME = "X-Skip-Crr"sv; +constexpr std::string_view SKIP_CRR_HDR_VALUE = "-"sv; + +struct PluginInfo { + Config config; + TSCont read_resp_hdr_contp; +}; + Config globalConfig; +TSCont global_read_resp_hdr_contp; + +static bool +should_skip_this_obj(TSHttpTxn txnp, Config *const config) +{ + int len = 0; + char *const urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &len); + + if (!config->isKnownLargeObj({urlstr, static_cast(len)})) { + DEBUG_LOG("Not a known large object, not slicing: %.*s", len, urlstr); + return true; + } + + return false; +} bool -read_request(TSHttpTxn txnp, Config *const config) +read_request(TSHttpTxn txnp, Config *const config, TSCont read_resp_hdr_contp) { DEBUG_LOG("slice read_request"); TxnHdrMgr hdrmgr; @@ -66,11 +93,6 @@ read_request(TSHttpTxn txnp, Config *const config) } } - // turn off any and all transaction caching (shouldn't matter) - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_SERVER_NO_STORE, true); - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_RESPONSE_CACHEABLE, false); - TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_REQUEST_CACHEABLE, false); - DEBUG_LOG("slice accepting and slicing"); // connection back into ATS sockaddr const *const ip = TSHttpTxnClientAddrGet(txnp); @@ -79,7 +101,7 @@ read_request(TSHttpTxn txnp, Config *const config) } TSAssert(nullptr != config); - Data *const data = new Data(config); + std::unique_ptr data = std::make_unique(config); data->m_method_type = header.method(); data->m_txnp = txnp; @@ -90,7 +112,6 @@ read_request(TSHttpTxn txnp, Config *const config) } else if (AF_INET6 == ip->sa_family) { memcpy(&data->m_client_ip, ip, sizeof(sockaddr_in6)); } else { - delete data; return false; } @@ -98,7 +119,21 @@ read_request(TSHttpTxn txnp, Config *const config) data->m_hostlen = sizeof(data->m_hostname) - 1; if (!header.valueForKey(TS_MIME_FIELD_HOST, TS_MIME_LEN_HOST, data->m_hostname, &data->m_hostlen)) { DEBUG_LOG("Unable to get hostname from header"); - delete data; + return false; + } + + // check if object is large enough to slice - skip small and unknown size objects + if (should_skip_this_obj(txnp, config)) { + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, read_resp_hdr_contp); + TSHttpTxnHookAdd(txnp, TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, read_resp_hdr_contp); + + // If the client sends a range request, and we don't slice, the range request goes to cache_range_requests, and can be + // cached as a range request. We don't want that, and instead want to skip CRR entirely. + if (header.hasKey(TS_MIME_FIELD_RANGE, TS_MIME_LEN_RANGE)) { + HttpHeader mutable_header(hdrmgr.m_buffer, hdrmgr.m_lochdr); + mutable_header.setKeyVal(SKIP_CRR_HDR_NAME.data(), SKIP_CRR_HDR_NAME.length(), SKIP_CRR_HDR_VALUE.data(), + SKIP_CRR_HDR_VALUE.length()); + } return false; } @@ -118,7 +153,6 @@ read_request(TSHttpTxn txnp, Config *const config) if (TS_SUCCESS != rcode) { ERROR_LOG("Error cloning pristine url"); TSMBufferDestroy(newbuf); - delete data; return false; } @@ -152,7 +186,6 @@ read_request(TSHttpTxn txnp, Config *const config) TSHandleMLocRelease(newbuf, nullptr, newloc); } TSMBufferDestroy(newbuf); - delete data; return false; } @@ -174,7 +207,7 @@ read_request(TSHttpTxn txnp, Config *const config) // we'll intercept this GET and do it ourselves TSMutex const mutex = TSContMutexGet(reinterpret_cast(txnp)); TSCont const icontp(TSContCreate(intercept_hook, mutex)); - TSContDataSet(icontp, (void *)data); + TSContDataSet(icontp, data.release()); // Skip remap and remap rule requirement TSHttpTxnCntlSet(txnp, TS_HTTP_CNTL_SKIP_REMAPPING, true); @@ -191,6 +224,60 @@ read_request(TSHttpTxn txnp, Config *const config) return false; } +static int +read_resp_hdr(TSCont contp, TSEvent event, void *edata) +{ + TSHttpTxn txnp = static_cast(edata); + PluginInfo *info = static_cast(TSContDataGet(contp)); + + // This function does the following things: + // 1. Parse the object size from Content-Length + // 2. Cache the object size + // 3. If the object will be sliced in subsequent requests, turn off the cache to avoid taking up space, and head-of-line blocking. + + int urllen = 0; + char *urlstr = TSHttpTxnEffectiveUrlStringGet(txnp, &urllen); + if (urlstr != nullptr) { + TxnHdrMgr response; + TxnHdrMgr::HeaderGetFunc func = event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE ? TSHttpTxnCachedRespGet : TSHttpTxnServerRespGet; + response.populateFrom(txnp, func); + HttpHeader const resp_header(response.m_buffer, response.m_lochdr); + char constr[1024]; + int conlen = sizeof constr; + bool const hasContentLength(resp_header.valueForKey(TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH, constr, &conlen)); + if (hasContentLength) { + uint64_t content_length; + + [[maybe_unused]] auto [ptr, ec] = std::from_chars(constr, constr + conlen, content_length); + if (ec == std::errc()) { + if (content_length >= info->config.m_min_size_to_slice) { + // Remember that this object is big + info->config.sizeCacheAdd({urlstr, static_cast(urllen)}, content_length); + // This object will be sliced in future requests. Don't cache it for now. + TSHttpTxnServerRespNoStoreSet(txnp, 1); + TSStatIntIncrement(info->config.stat_FN, 1); + } else { + TSStatIntIncrement(info->config.stat_TN, 1); + } + } else { + ERROR_LOG("Could not parse content-length: %.*s", conlen, constr); + TSStatIntIncrement(info->config.stat_bad_cl, 1); + } + } else { + DEBUG_LOG("Could not get a content length for updating object size"); + TSStatIntIncrement(info->config.stat_no_cl, 1); + } + TSfree(urlstr); + } else { + ERROR_LOG("Could not get URL for obj size."); + TSStatIntIncrement(info->config.stat_no_url, 1); + } + + // Reenable and continue with the state machine. + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; +} + int global_read_request_hook(TSCont // contp , @@ -199,7 +286,7 @@ global_read_request_hook(TSCont // contp void *edata) { TSHttpTxn const txnp = static_cast(edata); - read_request(txnp, &globalConfig); + read_request(txnp, &globalConfig, global_read_resp_hdr_contp); TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); return 0; } @@ -216,9 +303,9 @@ DbgCtl dbg_ctl{PLUGIN_NAME}; TSRemapStatus TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo * /* rri ATS_UNUSED */) { - Config *const config = static_cast(ih); + PluginInfo *const info = static_cast(ih); - if (read_request(txnp, config)) { + if (read_request(txnp, &info->config, info->read_resp_hdr_contp)) { return TSREMAP_DID_REMAP_STOP; } else { return TSREMAP_NO_REMAP; @@ -231,12 +318,61 @@ TSRemapOSResponse(void * /* ih ATS_UNUSED */, TSHttpTxn /* rh ATS_UNUSED */, int { } +static bool +register_stat(const char *name, int &id) +{ + if (TSStatFindName(name, &id) == TS_ERROR) { + id = TSStatCreate(name, TS_RECORDDATATYPE_INT, TS_STAT_NON_PERSISTENT, TS_STAT_SYNC_SUM); + if (id == TS_ERROR) { + ERROR_LOG("Failed to register stat '%s'", name); + return false; + } + } + + DEBUG_LOG("[%s] %s registered with id %d", PLUGIN_NAME, name, id); + + return true; +} + +static void +init_stats(Config &config, const std::string &prefix) +{ + const std::array, 7> stats{ + { + {".metadata_cache.true_large_objects", config.stat_TP}, + {".metadata_cache.true_small_objects", config.stat_TN}, + {".metadata_cache.false_large_objects", config.stat_FP}, + {".metadata_cache.false_small_objects", config.stat_FN}, + {".metadata_cache.no_content_length", config.stat_no_cl}, + {".metadata_cache.bad_content_length", config.stat_bad_cl}, + {".metadata_cache.no_url", config.stat_no_url}, + } + }; + + config.stats_enabled = true; + for (const auto &stat : stats) { + const std::string name = std::string{PLUGIN_NAME "."} + prefix + stat.first; + config.stats_enabled &= register_stat(name.c_str(), stat.second); + } +} + TSReturnCode TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf */, int /* errbuf_size */) { - Config *const config = new Config; - config->fromArgs(argc - 2, argv + 2); - *ih = static_cast(config); + PluginInfo *const info = new PluginInfo; + + info->config.fromArgs(argc - 2, argv + 2); + + TSCont read_resp_hdr_contp = TSContCreate(read_resp_hdr, nullptr); + TSContDataSet(read_resp_hdr_contp, static_cast(info)); + info->read_resp_hdr_contp = read_resp_hdr_contp; + + if (!info->config.stat_prefix.empty()) { + init_stats(info->config, info->config.stat_prefix); + } + + *ih = static_cast(info); + return TS_SUCCESS; } @@ -244,8 +380,9 @@ void TSRemapDeleteInstance(void *ih) { if (nullptr != ih) { - Config *const config = static_cast(ih); - delete config; + PluginInfo *const info = static_cast(ih); + TSContDestroy(info->read_resp_hdr_contp); + delete info; } } @@ -273,8 +410,12 @@ TSPluginInit(int argc, char const *argv[]) globalConfig.fromArgs(argc - 1, argv + 1); - TSCont const contp(TSContCreate(global_read_request_hook, nullptr)); + TSCont const global_read_request_contp(TSContCreate(global_read_request_hook, nullptr)); + global_read_resp_hdr_contp = TSContCreate(read_resp_hdr, nullptr); // Called immediately after the request header is read from the client - TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, contp); + TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, global_read_request_contp); + + // Register stats for metadata cache + init_stats(globalConfig, "global"); } diff --git a/plugins/slice/unit-tests/CMakeLists.txt b/plugins/slice/unit-tests/CMakeLists.txt index 9ddec81ee6f..b616af17a74 100644 --- a/plugins/slice/unit-tests/CMakeLists.txt +++ b/plugins/slice/unit-tests/CMakeLists.txt @@ -25,7 +25,12 @@ target_compile_definitions(test_range PRIVATE UNITTEST) target_link_libraries(test_range PRIVATE catch2::catch2 ts::tsutil) add_test(NAME test_range COMMAND test_range) -add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc) +add_executable(test_config test_config.cc ${PROJECT_SOURCE_DIR}/Config.cc ${PROJECT_SOURCE_DIR}/ObjectSizeCache.cc) target_compile_definitions(test_config PRIVATE UNITTEST) target_link_libraries(test_config PRIVATE PCRE::PCRE catch2::catch2 ts::tsutil) add_test(NAME test_config COMMAND test_config) + +add_executable(test_cache test_cache.cc ${PROJECT_SOURCE_DIR}/ObjectSizeCache.cc) +target_compile_definitions(test_cache PRIVATE UNITTEST) +target_link_libraries(test_cache PRIVATE catch2::catch2 ts::tsutil) +add_test(NAME test_cache COMMAND test_cache) diff --git a/plugins/slice/unit-tests/test_cache.cc b/plugins/slice/unit-tests/test_cache.cc new file mode 100644 index 00000000000..2c744337519 --- /dev/null +++ b/plugins/slice/unit-tests/test_cache.cc @@ -0,0 +1,173 @@ +/** @file test_cache.cc + + Unit tests for metadata cache + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#include +#include +#define CATCH_CONFIG_MAIN /* include main function */ +#include "catch.hpp" /* catch unit-test framework */ +#include "../ObjectSizeCache.h" + +using namespace std::string_view_literals; +TEST_CASE("cache miss", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + std::optional res = cache.get("example.com"sv); + CHECK(res == std::nullopt); +} + +TEST_CASE("cache hit", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + cache.set("example.com/123"sv, 123); + std::optional res2 = cache.get("example.com/123"sv); + CHECK(res2.value() == 123); +} + +TEST_CASE("cache remove", "[slice][metadatacache]") +{ + ObjectSizeCache cache{1024}; + cache.set("example.com/123"sv, 123); + std::optional res2 = cache.get("example.com/123"sv); + CHECK(res2.value() == 123); + cache.remove("example.com/123"sv); + std::optional res3 = cache.get("example.com/123"sv); + REQUIRE(!res3.has_value()); + REQUIRE(cache.cache_count() == 0); + REQUIRE(cache.cache_capacity() == 1024); +} + +TEST_CASE("eviction", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + cache.set(ss.str(), i); + } + size_t found = 0; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == i); + found++; + } + } + REQUIRE(found == cache_size); +} + +TEST_CASE("tiny cache", "[slice][metadatacache]") +{ + constexpr int cache_size = 1; + ObjectSizeCache cache{cache_size}; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + cache.set(ss.str(), i); + } + size_t found = 0; + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + ss << "http://example.com/" << i; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == i); + found++; + } + } + REQUIRE(found == cache_size); +} + +TEST_CASE("hit rate", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + std::mt19937 gen; + std::poisson_distribution d{cache_size}; + std::atomic hits{0}, misses{0}; + + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + uint64_t obj = d(gen); + + ss << "http://example.com/" << obj; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == obj); + hits++; + } else { + cache.set(ss.str(), obj); + misses++; + } + } + + INFO("Hits: " << hits); + INFO("Misses: " << misses); + REQUIRE(hits > cache_size * 50); +} + +TEST_CASE("threads", "[slice][metadatacache]") +{ + constexpr int cache_size = 10; + ObjectSizeCache cache{cache_size}; + + std::mt19937 gen; + std::poisson_distribution d{cache_size}; + std::vector threads; + std::atomic hits{0}, misses{0}; + + auto runfunc = [&]() { + for (uint64_t i = 0; i < cache_size * 100; i++) { + std::stringstream ss; + uint64_t obj = d(gen); + + ss << "http://example.com/" << obj; + std::optional size = cache.get(ss.str()); + if (size.has_value()) { + CHECK(size.value() == obj); + hits++; + } else { + cache.set(ss.str(), obj); + misses++; + } + } + }; + + for (int i = 0; i < 4; i++) { + threads.emplace_back(runfunc); + } + + for (auto &t : threads) { + t.join(); + } + INFO("Hits: " << hits); + INFO("Misses: " << misses); + REQUIRE(hits > cache_size * 50 * 4); + REQUIRE(cache.cache_count() == cache_size); + REQUIRE(cache.cache_capacity() == cache_size); +} diff --git a/tests/gold_tests/pluginTest/slice/slice_conditional.test.py b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py new file mode 100644 index 00000000000..fe39152979d --- /dev/null +++ b/tests/gold_tests/pluginTest/slice/slice_conditional.test.py @@ -0,0 +1,156 @@ +''' +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Test.Summary = ''' +Conditional Slicing test +''' + +# Test description: +# Preload the cache with the entire asset to be range requested. +# Reload remap rule with slice plugin +# Request content through the slice plugin + +Test.SkipUnless( + Condition.PluginExists('slice.so'), + Condition.PluginExists('cache_range_requests.so'), + Condition.PluginExists('xdebug.so'), +) +Test.ContinueOnFail = False + +# configure origin server +server = Test.MakeOriginServer("server", lookup_key="{PATH}{%Range}", options={'-v': None}) + +# Define ATS and configure +ts = Test.MakeATSProcess("ts") + +# small object, should not be sliced +req_small = { + "headers": "GET /small HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", + "body": "", +} +res_small = { + "headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": "smol", +} +server.addResponse("sessionlog.json", req_small, res_small) + +# large object, all in one slice +req_large = { + "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", + "body": "", +} +res_large = { + "headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": "unsliced large object!" +} +server.addResponse("sessionlog.json", req_large, res_large) + +# large object, this populates the individual slices in the server + +large_body = "large object sliced!" +body_len = len(large_body) +slice_begin = 0 +slice_block_size = 10 +while (slice_begin < body_len): + slice_end = slice_begin + slice_block_size + req_large_slice = { + "headers": "GET /large HTTP/1.1\r\n" + "Host: www.example.com\r\n" + f"Range: bytes={slice_begin}-{slice_end - 1}" + "\r\n", + "body": "", + } + if slice_end > body_len: + slice_end = body_len + res_large_slice = { + "headers": + "HTTP/1.1 206 Partial Content\r\n" + "Connection: close\r\n" + "Accept-Ranges: bytes\r\n" + + f"Content-Range: bytes {slice_begin}-{slice_end - 1}/{body_len}\r\n" + "Cache-Control: max-age=10,public\r\n" + "\r\n", + "body": large_body[slice_begin:slice_end] + } + server.addResponse("sessionlog.json", req_large_slice, res_large_slice) + slice_begin += slice_block_size + +# set up slice plugin with remap host into cache_range_requests +ts.Disk.remap_config.AddLines( + [ + f'map http://slice/ http://127.0.0.1:{server.Variables.Port}/' + + f' @plugin=slice.so @pparam=--blockbytes-test={slice_block_size} @pparam=--minimum-size=8 @pparam=--metadata-cache-size=4 @plugin=cache_range_requests.so' + ]) + +ts.Disk.plugin_config.AddLine('xdebug.so --enable=x-cache') +ts.Disk.records_config.update( + { + 'proxy.config.diags.debug.enabled': '0', + 'proxy.config.diags.debug.tags': 'http|cache|slice|xdebug|cache_range_requests', + }) + +curl_and_args = 'curl -s -D /dev/stdout -o /dev/stderr -x localhost:{}'.format(ts.Variables.port) + ' -H "x-debug: x-cache"' + +# Test case: first request of small object +tr = Test.AddTestRun("Small request 1") +ps = tr.Processes.Default +ps.StartBefore(server, ready=When.PortOpen(server.Variables.Port)) +ps.StartBefore(Test.Processes.ts) +ps.Command = curl_and_args + ' http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('smol', 'expected smol') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +# Test case: second request of small object - expect cache hit +tr = Test.AddTestRun("Small request 2") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('smol', 'expected smol') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts + +# Test case: range request of small object - expect cache hit (proxy.config.http.cache.range.lookup = 1) +tr = Test.AddTestRun("Small request - ranged") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' -r 1-2 http://slice/small' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('mo', 'expected mo') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts + +# Test case: first request of large object - expect unsliced, cache write disabled +tr = Test.AddTestRun("Large request 1") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('unsliced large object!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +# Test case: first request of large object - expect sliced, cache miss +tr = Test.AddTestRun("Large request 2") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('large object sliced!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: miss', 'expected cache miss') +tr.StillRunningAfter = ts + +## Test case: first request of large object - expect cache hit +tr = Test.AddTestRun("Large request 3") +ps = tr.Processes.Default +ps.Command = curl_and_args + ' http://slice/large' +ps.ReturnCode = 0 +ps.Streams.stderr.Content = Testers.ContainsExpression('large object sliced!', 'expected large object') +ps.Streams.stdout.Content = Testers.ContainsExpression('X-Cache: hit-fresh', 'expected cache hit-fresh') +tr.StillRunningAfter = ts