From 473dc3508423272d0800f49ccbd347047a80f605 Mon Sep 17 00:00:00 2001 From: "Alan M. Carroll" Date: Fri, 2 Dec 2016 11:51:06 -0600 Subject: [PATCH] MemView: A fast and efficient string and memory reference library. --- doc/developer-guide/architecture/index.en.rst | 2 +- .../architecture/memview.en.rst | 165 +++ lib/ts/Makefile.am | 7 +- lib/ts/MemView.cc | 121 ++ lib/ts/MemView.h | 1270 +++++++++++++++++ lib/ts/test_MemView.cc | 52 + 6 files changed, 1614 insertions(+), 3 deletions(-) create mode 100644 doc/developer-guide/architecture/memview.en.rst create mode 100644 lib/ts/MemView.cc create mode 100644 lib/ts/MemView.h create mode 100644 lib/ts/test_MemView.cc diff --git a/doc/developer-guide/architecture/index.en.rst b/doc/developer-guide/architecture/index.en.rst index 675265635d2..801631aee2e 100644 --- a/doc/developer-guide/architecture/index.en.rst +++ b/doc/developer-guide/architecture/index.en.rst @@ -40,4 +40,4 @@ understanding and modifying the source. consistency.en ram-cache.en tiered-storage.en - + memview.en diff --git a/doc/developer-guide/architecture/memview.en.rst b/doc/developer-guide/architecture/memview.en.rst new file mode 100644 index 00000000000..e1524e0d422 --- /dev/null +++ b/doc/developer-guide/architecture/memview.en.rst @@ -0,0 +1,165 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../common.defs + +.. default-domain:: cpp + +MemView +************* + +Synopsis +======== + +:code:`#include ` + +.. class:: MemView + +.. class:: StringView + +These classes act as views in to already allocated memory. Internally in |TS| work must be done with +string or memory entities that are embedded in larger pre-existing memory structures. These classes +are designed to make that easier, more efficient, and less error prone. + +Description +=========== + +The term "view" will be used to mean an instance of :class:`MemView` or :class:`StringView`. +Fundamentally both classes do the same thing, maintain a read only view of a contiguous region of +memory. They differ in the methods and return types due to the conflicting requirements of raw +memory operations and string based operations. + +A view is constructed by providing a contiguous region of memory, either based on a start pointer +and a length or a pair of pointers in the usual STL half open range style where the view starts at +the first pointer and ends one short of the second pointer. A view can be empty and refer to no +memory (which what default construction yields). A view attempts to act like a normal pointer in +most situations. A view is only somewhat more expensive than a raw pointer but in most cases a count +is needed as well making a view not any more costly than existing code. Any code that already keeps +a pointer and a count is a good candidate for using :class:`MemView` or :class:`StringView`. + +:class:`MemView` and :class:`StringView` inter-convert because the difference between them is simply +the API to access the underingly memory in the view, the actual class internal data is identical. + +:class:`StringView` provides a variety of methods for manipulating the view as a string. These are provided as families of overloads differentiated by how characters are compared. There are four flavors. + +* Direct, a pointer to the target character. +* Comparison, an explicit character value to compare. +* Set, a set of characters (described by a :class:`StringView`) which are compared, any one of which matches. +* Predicate, a function that takes a single character argument and returns a bool to indicate a match. + +If the latter three are inadequate the first, the direct pointer, can be used after finding the +appropriate character through some other mechanism. + +The increment operator for :class:`StringView` shrinks the view by one character from the front +which allows stepping through the view in normal way, although the string view itself should be the +loop condition, not a dereference of it. + +.. code-block:: cpp + + StringView v; + size_t hash = 0; + for ( ; v ; ++v) hash = hash * 13 + *v; + +Or, because the view acts as a container of characters, this can be done non-destructively. + +.. code-block:: cpp + + StringView v; + size_t hash = 0; + for (char c : v) hash = hash * 13 + c; + +Views are cheap to construct therefore making a copy to use destructively is very inexpensive. + +:class:`MemView` provides a :code:`find` method that searches for a matching value. The type of this +value can be anything that is fixed sized and supports the equality operator. The view is treated as +an array of the type and searched sequentially for a matching value. The value type is treated as +having no identity and cheap to copy, in the manner of a integral type. + +Parsing with StringView +----------------------- + +A primary use of :class:`StringView` is to do field oriented parsing. It is easy and fast to split +strings in to fields without modifying the original data. For example, assume that :arg:`value` +contains a null terminated string which is possibly several tokens separated by commas. + +.. code-block:: cpp + + #include + parse_token(const char* value) { + StringView v(value); // construct assuming null terminated string. + while (v) { + StringView token(v.extractPrefix(',')); + token.trim(&isspace); + if (token) { + // process token + } + } + } + +If :arg:`value` was ``bob ,dave, sam`` then :arg:`token` would be successively ``bob``, ``dave``, +``sam``. After `sam` was extracted :arg:`value` would be empty and the loop would exit. :arg:`token` +can be empty in the case of adjacent delimiters or a trailing delimiter. Note that no memory +allocation at all is done because each view is a pointer in to :arg:`value` and there is no need to +put nul characters in the source string meaning no need to duplicate it to prevent permanent +changes. + +What if the tokens were key / value pairs, of the form `key=value`? This is can be done as in the following example. + +.. code-block:: cpp + + #include + parse_token(const char* source) { + StringView in(source); // construct assuming null terminated string. + while (in) { + StringView value(in.extractPrefix(',')); + StringView key(value.trim(&isspace).splitPrefix('=').rtrim(&isspace)); + if (key) { + // it's a key=value token with key and value set appropriately. + value.ltrim(&isspace); // clip potential space after '='. + } else { + // it's just a single token which is in value. + } + } + } + +Nested delimiters are handled by further splitting in a recursive way which, because the original +string is never modified, is straight forward. + +History +======= + +The first attempt at this functionality was in the TSConfig library in the :code:`ts::Buffer` and +:code:`ts::ConstBuffer` classes. Originally intended just as raw memory views, +:code:`ts::ConstBuffer` in particular was repeated enhanced to provide better support for strings. +The header was eventually moved from :literal:`lib/tsconfig` to :literal:`lib/ts` and was used in in +various part of the |TS| core. + +There was then a proposal to make these classes available to plugin writers as they proved handy in +the core. A suggested alternative was `Boost.StringRef +`_ which provides a +similar functionality using :code:`std::string` as the base of the pre-allocated memory. A version +of the header was ported to |TS| (by stripping all the Boost support and cross includes) but in use +proved to provide little of the functionality available in :code:`ts::ConstBuffer`. If extensive +reworking was required in any case, it seemed better to start from scratch and build just what was +useful in the |TS| context. + +The next step was the :code:`StringView` class which turned out reasonably well. It was then +suggested that more support for raw memory (as opposed to memory presumed to contain printable ASCII +data) would be useful. An attempt was made to do this but the differences in arguments, subtle +method differences, and return types made that infeasible. Instead :class:`MemView` was split off to +provide a :code:`void*` oriented view. String specific methods were stripped out and a few +non-character based methods added. diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am index 65acd0060ac..18f387f8440 100644 --- a/lib/ts/Makefile.am +++ b/lib/ts/Makefile.am @@ -23,7 +23,7 @@ library_includedir=$(includedir)/ts library_include_HEADERS = apidefs.h noinst_PROGRAMS = mkdfa CompileParseRules -check_PROGRAMS = test_tsutil test_arena test_atomic test_freelist test_geometry test_List test_Map test_Vec test_X509HostnameValidator +check_PROGRAMS = test_tsutil test_arena test_atomic test_freelist test_geometry test_List test_Map test_Vec test_X509HostnameValidator test_MemView TESTS_ENVIRONMENT = LSAN_OPTIONS=suppressions=suppression.txt @@ -189,6 +189,7 @@ libtsutil_la_SOURCES = \ lockfile.cc \ signals.cc \ signals.h \ + MemView.h MemView.cc \ X509HostnameValidator.cc \ X509HostnameValidator.h @@ -235,6 +236,9 @@ test_tsutil_SOURCES = \ test_Regex.cc \ tests.cc +test_MemView_SOURCES = test_MemView.cc +test_MemView_LDADD = libtsutil.la + CompileParseRules_SOURCES = CompileParseRules.cc clean-local: @@ -242,4 +246,3 @@ clean-local: tidy-local: $(DIST_SOURCES) $(CXX_Clang_Tidy) - diff --git a/lib/ts/MemView.cc b/lib/ts/MemView.cc new file mode 100644 index 00000000000..df842b786d0 --- /dev/null +++ b/lib/ts/MemView.cc @@ -0,0 +1,121 @@ +#include +#include +#include + +namespace ApacheTrafficServer +{ +int +memcmp(MemView const &lhs, MemView const &rhs) +{ + int zret; + size_t n; + + // Seems a bit ugly but size comparisons must be done anyway to get the memcmp args. + if (lhs.size() < rhs.size()) + zret = 1, n = lhs.size(); + else { + n = rhs.size(); + zret = rhs.size() < lhs.size() ? -1 : 0; + } + + int r = ::memcmp(lhs.ptr(), rhs.ptr(), n); + if (0 != r) // If we got a not-equal, override the size based result. + zret = r; + + return zret; +} + +int +strcasecmp(StringView lhs, StringView rhs) +{ + while (lhs && rhs) { + char l = tolower(*lhs); + char r = tolower(*rhs); + if (l < r) + return -1; + else if (r < l) + return 1; + ++lhs, ++rhs; + } + return lhs ? 1 : rhs ? -1 : 0; +} + +intmax_t +svtoi(StringView src, StringView *out, int base) +{ + static const int8_t convert[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40 + 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 50 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60 + 25, 26, 27, 28, 20, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 70 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0 + }; + + intmax_t zret = 0; + + if (*out) + out->clear(); + if (!(1 < base && base <= 36)) + return 0; + if (src.ltrim(&isspace)) { + const char *start = src.ptr(); + int8_t v; + bool neg = false; + if ('-' == *src) { + ++src; + neg = true; + } + while (src.size() && (-1 != (v = convert[static_cast(*src)]))) { + zret = zret * base + v; + ++src; + } + if (out && (src.ptr() > (neg ? start + 1 : start))) { + out->setView(start, src.ptr()); + } + + if (neg) + zret = -zret; + } + return zret; +} + +// Do the template instantions. +template void detail::stream_fill(std::ostream &, std::size_t); +template std::ostream &StringView::stream_write(std::ostream &, const StringView &) const; +} + +namespace std +{ +ostream & +operator<<(ostream &os, const ApacheTrafficServer::MemView &b) +{ + if (os.good()) { + ostringstream out; + out << b.size() << '@' << hex << b.ptr(); + os << out.str(); + } + return os; +} + +ostream & +operator<<(ostream &os, const ApacheTrafficServer::StringView &b) +{ + if (os.good()) { + b.stream_write(os, b); + os.width(0); + } + return os; +} +} diff --git a/lib/ts/MemView.h b/lib/ts/MemView.h new file mode 100644 index 00000000000..1d8ad30b907 --- /dev/null +++ b/lib/ts/MemView.h @@ -0,0 +1,1270 @@ +#if !defined TS_MEM_VIEW +#define TS_MEM_VIEW + +/** @file + + Class for handling "views" of a buffer. Views presume the memory for the buffer is managed + elsewhere and allow efficient access to segments of the buffer without copies. Views are read + only as the view doesn't own the memory. Along with generic buffer methods are specialized + methods to support better string parsing, particularly token based parsing. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +/// Apache Traffic Server commons. +namespace ApacheTrafficServer +{ +class MemView; +class StringView; + +int memcmp(MemView const &lhs, MemView const &rhs); +int strcmp(StringView const &lhs, StringView const &rhs); +int strcasecmp(StringView lhs, StringView rhs); + +/** Convert the text in @c StringView @a src to a numeric value. + + If @a parsed is non-null then the part of the string actually parsed is placed there. + @a base sets the conversion base. This defaults to 10 with two special cases: + + - If the number starts with a literal '0' then it is treated as base 8. + - If the number starts with the literal characters '0x' or '0X' then it is treated as base 16. +*/ +intmax_t svtoi(StringView src, StringView *parsed = nullptr, int base = 10); + +/** A read only view of contiguous piece of memory. + + A @c MemView does not own the memory to which it refers, it is simply a view of part of some + (presumably) larger memory object. The purpose is to allow working in a read only way a specific + part of the memory. This can avoid copying or allocation by allocating all needed memory at once + and then working with it via instances of this class. + + MemView is based on an earlier class ConstBuffer and influenced by Boost.string_ref. Neither + of these were adequate for how use of @c ConstBuffer evolved and so @c MemView is @c + ConstBuffer with some additional stylistic changes based on Boost.string_ref. + + This class is closely integrated with @c StringView. These classes have the same underlying + implementation and are differentiated only because of the return types and a few string oriented + methods. + */ +class MemView +{ + typedef MemView self; ///< Self reference type. + +protected: + const void *_ptr = nullptr; ///< Pointer to base of memory chunk. + size_t _size = 0; ///< Size of memory chunk. + +public: + /// Default constructor (empty buffer). + constexpr MemView(); + + /** Construct explicitly with a pointer and size. + */ + constexpr MemView(const void *ptr, ///< Pointer to buffer. + size_t n ///< Size of buffer. + ); + + /** Construct from a half open range of two pointers. + @note The byte at @start is in the view but the byte at @a end is not. + */ + constexpr MemView(const void *start, ///< First byte in the view. + const void *end ///< First byte not in the view. + ); + + /** Construct from nullptr. + This implicitly makes the length 0. + */ + constexpr MemView(std::nullptr_t); + + /// Convert from StringView. + constexpr MemView(StringView const &that); + + /** Equality. + + This is effectively a pointer comparison, buffer contents are not compared. + + @return @c true if @a that refers to the same view as @a this, + @c false otherwise. + */ + bool operator==(self const &that) const; + + /** Inequality. + @return @c true if @a that does not refer to the same view as @a this, + @c false otherwise. + */ + bool operator!=(self const &that) const; + + /// Assignment - the view is copied, not the content. + self &operator=(self const &that); + + /** Shift the view to discard the first byte. + @return @a this. + */ + self &operator++(); + + /** Shift the view to discard the leading @a n bytes. + @return @a this + */ + self &operator+=(size_t n); + + /// Check for empty view. + /// @return @c true if the view has a zero pointer @b or size. + bool operator!() const; + + /// Check for non-empty view. + /// @return @c true if the view refers to a non-empty range of bytes. + explicit operator bool() const; + + /// Check for empty view (no content). + /// @see operator bool + bool is_empty() const; + + /// @name Accessors. + //@{ + /// Pointer to the first byte in the view. + const void *begin() const; + /// Pointer to first byte not in the view. + const void *end() const; + /// Number of bytes in the view. + constexpr size_t size() const; + /// Memory pointer. + /// @note This is equivalent to @c begin currently but it's probably good to have separation. + constexpr const void *ptr() const; + /// @return the @a V value at index @a n. + template V array(size_t n) const; + //@} + + /// Set the view. + /// This is faster but equivalent to constructing a new view with the same + /// arguments and assigning it. + /// @return @c this. + self &setView(const void *ptr, ///< Buffer address. + size_t n = 0 ///< Buffer size. + ); + + /// Set the view. + /// This is faster but equivalent to constructing a new view with the same + /// arguments and assigning it. + /// @return @c this. + self &setView(const void *start, ///< First valid character. + const void *end ///< First invalid character. + ); + + /// Clear the view (become an empty view). + self &clear(); + + /// @return @c true if the byte at @a *p is in the view. + bool contains(const void *p) const; + + /** Find a value. + The memory is searched as if it were an array of the value type @a T. + + @return A pointer to the first occurrence of @a v in @a this + or @c nullptr if @a v is not found. + */ + template const V *find(V v) const; + + /** Find a value. + The memory is searched as if it were an array of the value type @a V. + + @return A pointer to the first value for which @a pred is @c true otherwise + @c nullptr. + */ + template const V *find(std::function const &pred); + + /** Get the initial segment of the view before @a p. + + The byte at @a p is not included. If @a p is not in the view an empty view + is returned. + + @return A buffer that contains all data before @a p. + */ + self prefix(const void *p) const; + + /** Split the view at @a p. + + The view is split in to two parts at @a p and the prefix is returned. The view is updated to + contain the bytes not returned in the prefix. The prefix will not contain @a p. + + @note If @a *p refers to a byte that is not in @a this then @a this is not changed and an empty + buffer is returned. Therefore this method can be safely called with the return value of + calling @c find. + + @return A buffer containing data up to but not including @a p. + + @see extractPrefix + */ + self splitPrefix(const void *p); + + /** Extract a prefix delimited by @a p. + + A prefix of @a this is removed from the view and returned. If @a p is not in the view then the + entire view is extracted and returned. + + If @a p points at a byte in the view this is identical to @c splitPrefix. If not then the + entire view in @a this will be returned and @a this will become an empty view. + + @return The prefix bounded at @a p or the entire view if @a p is not a byte in the view. + + @see splitPrefix + */ + self extractPrefix(const void *p); + + /** Get the trailing segment of the view after @a p. + + The byte at @a p is not included. If @a p is not in the view an empty view is returned. + + @return A buffer that contains all data after @a p. + */ + self suffix(const void *p) const; + + /** Split the view at @a p. + + The view is split in to two parts and the suffix is returned. The view is updated to contain + the bytes not returned in the suffix. The suffix will not contain @a p. + + @note If @a p does not refer to a byte in the view, an empty view is returned and @a this is + unchanged. + + @return @a this. + */ + self splitSuffix(const void *p); +}; + +/** A read only view of contiguous piece of memory. + + A @c StringView does not own the memory to which it refers, it is simply a view of part of some + (presumably) larger memory object. The purpose is to allow working in a read only way a specific + part of the memory. A classic example for ATS is working with HTTP header fields and values + which need to be accessed independently but preferably without copying. A @c StringView supports this style. + + MemView is based on an earlier class ConstBuffer and influenced by Boost.string_ref. Neither + of these were adequate for how use of @c ConstBuffer evolved and so @c MemView is @c + ConstBuffer with some additional stylistic changes based on Boost.string_ref. + + In particular @c MemView is designed both to support passing via API (to replace the need to + pass two parameters for one real argument) and to aid in parsing input without copying. + + */ +class StringView +{ + typedef StringView self; ///< Self reference type. + +protected: + const char *_ptr = nullptr; ///< Pointer to base of memory chunk. + size_t _size = 0; ///< Size of memory chunk. + +public: + /// Default constructor (empty buffer). + constexpr StringView(); + + /** Construct explicitly with a pointer and size. + */ + constexpr StringView(const char *ptr, ///< Pointer to buffer. + size_t n ///< Size of buffer. + ); + + /** Construct from a half open range of two pointers. + @note The byte at @start is in the view but the byte at @a end is not. + */ + constexpr StringView(const char *start, ///< First byte in the view. + const char *end ///< First byte not in the view. + ); + + /** Construct from nullptr. + This implicitly makes the length 0. + */ + constexpr StringView(std::nullptr_t); + + /** Construct from null terminated string. + @note The terminating null is not included. @c strlen is used to determine the length. + */ + explicit StringView(const char *s); + + /// Construct from @c MemView to reference the same view. + constexpr StringView(MemView const &that); + + /// Construct from @c std::string, referencing the entire string contents. + StringView(std::string const &str); + + /** Equality. + + This is effectively a pointer comparison, buffer contents are not compared. + + @return @c true if @a that refers to the same view as @a this, + @c false otherwise. + */ + bool operator==(self const &that) const; + + /** Inequality. + @return @c true if @a that does not refer to the same view as @a this, + @c false otherwise. + */ + bool operator!=(self const &that) const; + + /// Assignment - the view is copied, not the content. + self &operator=(self const &that); + + /// @return The first byte in the view. + char operator*() const; + + /// @return the byte at offset @a n. + char operator[](size_t n) const; + + /// @return the byte at offset @a n. + char operator[](int n) const; + + /** Shift the view to discard the first byte. + @return @a this. + */ + self &operator++(); + + /** Shift the view to discard the leading @a n bytes. + @return @a this + */ + self &operator+=(size_t n); + + /// Check for empty view. + /// @return @c true if the view has a zero pointer @b or size. + bool operator!() const; + + /// Check for non-empty view. + /// @return @c true if the view refers to a non-empty range of bytes. + explicit operator bool() const; + + /// Check for empty view (no content). + /// @see operator bool + bool is_empty() const; + + /// @name Accessors. + //@{ + /// Pointer to the first byte in the view. + const char *begin() const; + /// Pointer to first byte not in the view. + const char *end() const; + /// Number of bytes in the view. + constexpr size_t size() const; + /// Memory pointer. + /// @note This is equivalent to @c begin currently but it's probably good to have separation. + constexpr const char *ptr() const; + //@} + + /// Set the view. + /// This is faster but equivalent to constructing a new view with the same + /// arguments and assigning it. + /// @return @c this. + self &setView(const char *ptr, ///< Buffer address. + size_t n = 0 ///< Buffer size. + ); + + /// Set the view. + /// This is faster but equivalent to constructing a new view with the same + /// arguments and assigning it. + /// @return @c this. + self &setView(const char *start, ///< First valid character. + const char *end ///< First invalid character. + ); + + /// Clear the view (become an empty view). + self &clear(); + + /// @return @c true if the byte at @a *p is in the view. + bool contains(const char *p) const; + + /** Find a byte. + @return A pointer to the first occurrence of @a c in @a this + or @c nullptr if @a c is not found. + */ + const char *find(char c) const; + + /** Find a byte. + @return A pointer to the first occurence of any of @a delimiters in @a + this or @c nullptr if not found. + */ + const char *find(self delimiters) const; + + /** Find a byte. + @return A pointer to the first byte for which @a pred is @c true otherwise + @c nullptr. + */ + const char *find(std::function const &pred) const; + + /** Remove bytes that match @a c from the start of the view. + */ + self <rim(char c); + /** Remove bytes from the start of the view that are in @a delimiters. + */ + self <rim(self delimiters); + /** Remove bytes from the start of the view for which @a pred is @c true. + */ + self <rim(std::function const &pred); + + /** Remove bytes that match @a c from the end of the view. + */ + self &rtrim(char c); + /** Remove bytes from the end of the view that are in @a delimiters. + */ + self &rtrim(self delimiters); + /** Remove bytes from the start and end of the view for which @a pred is @c true. + */ + self &rtrim(std::function const &pred); + + /** Remove bytes that match @a c from the end of the view. + */ + self &trim(char c); + /** Remove bytes from the start and end of the view that are in @a delimiters. + */ + self &trim(self delimiters); + /** Remove bytes from the start and end of the view for which @a pred is @c true. + */ + self &trim(std::function const &pred); + + /** Get the initial segment of the view before @a p. + + The byte at @a p is not included. If @a p is not in the view an empty view + is returned. + + @return A buffer that contains all data before @a p. + */ + self prefix(const char *p) const; + + /// Convenience overload for character. + self prefix(char c); + /// Convenience overload, split on delimiter set. + self prefix(self delimiters) const; + /// Convenience overload, split on predicate. + self prefix(std::function const &pred) const; + + /** Split the view on the character at @a p. + + The view is split in to two parts and the byte at @a p is discarded. @a this retains all data + @b after @a p (equivalent to MemView(p+1, this->end()). A new view containing the + initial bytes up to but not including @a p is returned, (equivalent to + MemView(this->begin(), p)). + + This is convenient when tokenizing and @a p points at a delimiter. + + @note If @a *p refers toa byte that is not in @a this then @a this is not changed and an empty + buffer is returned. Therefore this method can be safely called with the return value of + calling @c find. + + @code + void f(MemView& text) { + MemView token = text.splitPrefix(text.find(delimiter)); + if (token) { // ... process token } + @endcode + + @return A buffer containing data up to but not including @a p. + + @see extractPrefix + */ + self splitPrefix(const char *p); + + /// Convenience overload, split on character. + self splitPrefix(char c); + /// Convenience overload, split on delimiter set. + self splitPrefix(self delimiters); + /// Convenience overload, split on predicate. + self splitPrefix(std::function const &pred); + + /** Extract a prefix delimited by @a p. + + A prefix of @a this is removed from the view and returned. If @a p is not in the view then the + entire view is extracted and returned. + + If @a p points at a byte in the view this is identical to @c splitPrefix. If not then the + entire view in @a this will be returned and @a this will become an empty view. This is easier + to use when repeated extracting tokens. The source view will become empty after extracting the + last token. + + @code + MemView text; + while (text) { + MemView token = text.extractPrefix(text.find(delimiter)); + // .. process token which will always be non-empty because text was not empty. + } + @endcode + + @return The prefix bounded at @a p or the entire view if @a p is not a byte in the view. + + @see splitPrefix + */ + self extractPrefix(const char *p); + + /// Convenience overload, extract on delimiter set. + self extractPrefix(char c); + /// Convenience overload, extract on delimiter set. + self extractPrefix(self delimiters); + /// Convenience overload, extract on predicate. + self extractPrefix(std::function const &pred); + + /** Get the trailing segment of the view after @a p. + + The byte at @a p is not included. If @a p is not in the view an empty view is returned. + + @return A buffer that contains all data after @a p. + */ + self suffix(const char *p) const; + + /// Convenience overload for character. + self suffix(char c); + /// Convenience overload for delimiter set. + self suffix(self delimiters); + /// Convenience overload for predicate. + self suffix(std::function const &pred); + + /** Split the view on the character at @a p. + + The view is split in to two parts and the byte at @a p is discarded. @a this retains all data + @b before @a p (equivalent to MemView(this->begin(), p)). A new view containing + the trailing bytes after @a p is returned, (equivalent to MemView(p+1, + this->end())). + + @note If @a p does not refer to a byte in the view, an empty view is returned and @a this is + unchanged. + + @return @a this. + */ + self splitSuffix(const char *p); + + /// Convenience overload for character. + self splitSuffix(char c); + /// Convenience overload for delimiter set. + self splitSuffix(self delimiters); + /// Convenience overload for predicate. + self splitSuffix(std::function const &pred); + + // Functors for using this class in STL containers. + /// Ordering functor, lexicographic comparison. + struct LessThan { + bool + operator()(MemView const &lhs, MemView const &rhs) + { + return -1 == strcmp(lhs, rhs); + } + }; + /// Ordering functor, case ignoring lexicographic comparison. + struct LessThanNoCase { + bool + operator()(MemView const &lhs, MemView const &rhs) + { + return -1 == strcasecmp(lhs, rhs); + } + }; + + /// Specialized stream operator implementation. + /// @note Use the standard stream operator unless there is a specific need for this, which is unlikely. + /// @return The stream @a os. + /// @internal Needed because @c std::ostream::write must be used and + /// so alignment / fill have to be explicitly handled. + template Stream &stream_write(Stream &os, const StringView &b) const; + +protected: + /// Initialize a bit mask to mark which characters are in this view. + void initDelimiterSet(std::bitset<256> &set); +}; +// ---------------------------------------------------------- +// Inline implementations. + +inline constexpr MemView::MemView() +{ +} +inline constexpr MemView::MemView(void const *ptr, size_t n) : _ptr(ptr), _size(n) +{ +} +inline constexpr MemView::MemView(void const *start, void const *end) + : _ptr(start), _size(static_cast(end) - static_cast(start)) +{ +} +inline constexpr MemView::MemView(std::nullptr_t) : _ptr(nullptr), _size(0) +{ +} +inline constexpr MemView::MemView(StringView const &that) : _ptr(that.ptr()), _size(that.size()) +{ +} + +inline MemView & +MemView::setView(const void *ptr, size_t n) +{ + _ptr = ptr; + _size = n; + return *this; +} + +inline MemView & +MemView::setView(const void *ptr, const void *limit) +{ + _ptr = ptr; + _size = static_cast(limit) - static_cast(ptr); + return *this; +} + +inline MemView & +MemView::clear() +{ + _ptr = 0; + _size = 0; + return *this; +} + +inline bool +MemView::operator==(self const &that) const +{ + return _size == that._size && _ptr == that._ptr; +} + +inline bool +MemView::operator!=(self const &that) const +{ + return !(*this == that); +} + +inline bool MemView::operator!() const +{ + return !(_ptr && _size); +} + +inline MemView::operator bool() const +{ + return _ptr && _size; +} + +inline bool +MemView::is_empty() const +{ + return !(_ptr && _size); +} + +inline MemView &MemView::operator++() +{ + _ptr = static_cast(_ptr) + 1; + --_size; + return *this; +} + +inline MemView & +MemView::operator+=(size_t n) +{ + if (n > _size) { + _ptr = nullptr; + _size = 0; + } else { + _ptr = static_cast(_ptr) + n; + _size -= n; + } + return *this; +} + +inline const void * +MemView::begin() const +{ + return _ptr; +} +inline constexpr const void * +MemView::ptr() const +{ + return _ptr; +} + +inline const void * +MemView::end() const +{ + return static_cast(_ptr) + _size; +} + +inline constexpr size_t +MemView::size() const +{ + return _size; +} + +inline MemView & +MemView::operator=(MemView const &that) +{ + _ptr = that._ptr; + _size = that._size; + return *this; +} + +inline bool +MemView::contains(const void *p) const +{ + return _ptr <= this->begin() && p < this->end(); +} + +inline MemView +MemView::prefix(const void *p) const +{ + self zret; + if (this->contains(p)) + zret.setView(_ptr, p); + return zret; +} + +inline MemView +MemView::splitPrefix(const void *p) +{ + self zret; // default to empty return. + if (this->contains(p)) { + zret.setView(_ptr, p); + this->setView(p, this->end()); + } + return zret; +} + +inline MemView +MemView::extractPrefix(const void *p) +{ + self zret{this->splitPrefix(p)}; + + // For extraction if zret is empty, use up all of @a this + if (!zret) { + zret = *this; + this->clear(); + } + + return zret; +} + +inline MemView +MemView::suffix(const void *p) const +{ + self zret; + if (this->contains(p)) + zret.setView(p, this->end()); + return zret; +} + +inline MemView +MemView::splitSuffix(const void *p) +{ + self zret; + if (this->contains(p)) { + zret.setView(p, this->end()); + this->setView(_ptr, p); + } + return zret; +} + +template +inline V +MemView::array(size_t n) const +{ + return static_cast(_ptr)[n]; +} + +template +inline const V * +MemView::find(V v) const +{ + for (const V *spot = static_cast(_ptr), limit = spot + (_size / sizeof(V)); spot < limit; ++spot) + if (v == *spot) + return spot; + return nullptr; +} + +// Specialize char for performance. +template <> +inline const char * +MemView::find(char v) const +{ + return static_cast(memchr(_ptr, v, _size)); +} + +template +inline const V * +MemView::find(std::function const &pred) +{ + for (const V *p = static_cast(_ptr), *limit = p + (_size / sizeof(V)); p < limit; ++p) + if (pred(*p)) + return p; + return nullptr; +} + +// === StringView Implementation === +inline constexpr StringView::StringView() +{ +} +inline constexpr StringView::StringView(const char *ptr, size_t n) : _ptr(ptr), _size(n) +{ +} +inline constexpr StringView::StringView(const char *start, const char *end) : _ptr(start), _size(end - start) +{ +} +inline StringView::StringView(const char *s) : _ptr(s), _size(strlen(s)) +{ +} +inline constexpr StringView::StringView(std::nullptr_t) : _ptr(nullptr), _size(0) +{ +} +inline constexpr StringView::StringView(MemView const &that) : _ptr(static_cast(that.ptr())), _size(that.size()) +{ +} +inline StringView::StringView(std::string const &str) : _ptr(str.data()), _size(str.size()) +{ +} + +inline void StringView::initDelimiterSet(std::bitset<256> &set) +{ + set.reset(); + for (char c : *this) + set[static_cast(c)] = true; +} + +inline StringView & +StringView::setView(const char *ptr, size_t n) +{ + _ptr = ptr; + _size = n; + return *this; +} + +inline StringView & +StringView::setView(const char *ptr, const char *limit) +{ + _ptr = ptr; + _size = limit - ptr; + return *this; +} + +inline StringView & +StringView::clear() +{ + _ptr = 0; + _size = 0; + return *this; +} + +inline bool +StringView::operator==(self const &that) const +{ + return _size == that._size && _ptr == that._ptr; +} + +inline bool +StringView::operator!=(self const &that) const +{ + return !(*this == that); +} + +inline bool StringView::operator!() const +{ + return !(_ptr && _size); +} + +inline StringView::operator bool() const +{ + return _ptr && _size; +} + +inline bool +StringView::is_empty() const +{ + return !(_ptr && _size); +} + +inline char StringView::operator*() const +{ + return *_ptr; +} + +inline StringView &StringView::operator++() +{ + ++_ptr; + --_size; + return *this; +} + +inline StringView & +StringView::operator+=(size_t n) +{ + if (n > _size) { + _ptr = nullptr; + _size = 0; + } else { + _ptr += n; + _size -= n; + } + return *this; +} + +inline const char * +StringView::begin() const +{ + return _ptr; +} +inline constexpr const char * +StringView::ptr() const +{ + return _ptr; +} + +inline const char * +StringView::end() const +{ + return _ptr + _size; +} + +inline constexpr size_t +StringView::size() const +{ + return _size; +} + +inline StringView & +StringView::operator=(StringView const &that) +{ + _ptr = that._ptr; + _size = that._size; + return *this; +} + +inline char StringView::operator[](size_t n) const +{ + return _ptr[n]; +} + +inline char StringView::operator[](int n) const +{ + return _ptr[n]; +} + +inline bool +StringView::contains(const char *p) const +{ + return _ptr <= p && p < _ptr + _size; +} + +inline auto +StringView::prefix(const char *p) const -> self +{ + self zret; + if (this->contains(p)) + zret.setView(_ptr, p); + return zret; +} + +inline auto +StringView::prefix(char c) -> self +{ + return this->prefix(this->find(c)); +} + +inline auto +StringView::prefix(self delimiters) const -> self +{ + return this->prefix(this->find(delimiters)); +} + +inline auto +StringView::prefix(std::function const &pred) const -> self +{ + return this->prefix(this->find(pred)); +} + +inline auto +StringView::splitPrefix(const char *p) -> self +{ + self zret; // default to empty return. + if (this->contains(p)) { + zret.setView(_ptr, p); + this->setView(p + 1, this->end()); + } + return zret; +} + +inline auto +StringView::splitPrefix(char c) -> self +{ + return this->splitPrefix(this->find(c)); +} + +inline auto +StringView::splitPrefix(self delimiters) -> self +{ + return this->splitPrefix(this->find(delimiters)); +} + +inline auto +StringView::splitPrefix(std::function const &pred) -> self +{ + return this->splitPrefix(this->find(pred)); +} + +inline StringView +StringView::extractPrefix(const char *p) +{ + self zret{this->splitPrefix(p)}; + + // For extraction if zret is empty, use up all of @a this + if (!zret) { + zret = *this; + this->clear(); + } + + return zret; +} + +inline auto +StringView::extractPrefix(char c) -> self +{ + return this->extractPrefix(this->find(c)); +} + +inline auto +StringView::extractPrefix(self delimiters) -> self +{ + return this->extractPrefix(this->find(delimiters)); +} + +inline auto +StringView::extractPrefix(std::function const &pred) -> self +{ + return this->extractPrefix(this->find(pred)); +} + +inline StringView +StringView::suffix(const char *p) const +{ + self zret; + if (this->contains(p)) + zret.setView(p + 1, _ptr + _size); + return zret; +} + +inline auto +StringView::suffix(char c) -> self +{ + return this->suffix(this->find(c)); +} + +inline auto +StringView::suffix(self delimiters) -> self +{ + return this->suffix(this->find(delimiters)); +} + +inline auto +StringView::suffix(std::function const &pred) -> self +{ + return this->suffix(this->find(pred)); +} + +inline StringView +StringView::splitSuffix(const char *p) +{ + self zret; + if (this->contains(p)) { + zret.setView(p + 1, this->end()); + this->setView(_ptr, p); + } + return zret; +} + +inline auto +StringView::splitSuffix(char c) -> self +{ + return this->splitSuffix(this->find(c)); +} + +inline auto +StringView::splitSuffix(self delimiters) -> self +{ + return this->splitSuffix(this->find(delimiters)); +} + +inline auto +StringView::splitSuffix(std::function const &pred) -> self +{ + return this->splitSuffix(this->find(pred)); +} + +inline const char * +StringView::find(char c) const +{ + return static_cast(memchr(_ptr, c, _size)); +} + +inline const char * +StringView::find(self delimiters) const +{ + std::bitset<256> valid; + delimiters.initDelimiterSet(valid); + + for (const char *p = this->begin(), *limit = this->end(); p < limit; ++p) + if (valid[static_cast(*p)]) + return p; + + return nullptr; +} + +inline const char * +StringView::find(std::function const &pred) const +{ + const char *p = std::find_if(this->begin(), this->end(), pred); + return p == this->end() ? nullptr : p; +} + +inline StringView & +StringView::ltrim(char c) +{ + while (_size && *_ptr == c) + ++*this; + return *this; +} + +inline StringView & +StringView::rtrim(char c) +{ + while (_size && _ptr[_size - 1] == c) + --_size; + return *this; +} +inline StringView & +StringView::trim(char c) +{ + this->ltrim(c); + return this->rtrim(c); +} + +inline StringView & +StringView::ltrim(self delimiters) +{ + std::bitset<256> valid; + delimiters.initDelimiterSet(valid); + + while (_size && valid[static_cast(*_ptr)]) + ++*this; + + return *this; +} + +inline StringView & +StringView::rtrim(self delimiters) +{ + std::bitset<256> valid; + delimiters.initDelimiterSet(valid); + + while (_size && valid[static_cast(_ptr[_size - 1])]) + --_size; + + return *this; +} + +inline StringView & +StringView::trim(self delimiters) +{ + std::bitset<256> valid; + delimiters.initDelimiterSet(valid); + // Do this explicitly, so we don't have to initialize the character set twice. + while (_size && valid[static_cast(_ptr[_size - 1])]) + --_size; + while (_size && valid[static_cast(_ptr[0])]) + ++*this; + return *this; +} + +inline StringView & +StringView::ltrim(std::function const &pred) +{ + while (_size && pred(_ptr[0])) + ++*this; + return *this; +} + +inline StringView & +StringView::rtrim(std::function const &pred) +{ + while (_size && pred(_ptr[_size - 1])) + --_size; + return *this; +} + +inline StringView & +StringView::trim(std::function const &pred) +{ + this->ltrim(pred); + return this->rtrim(pred); +} + +inline int +strcmp(StringView const &lhs, StringView const &rhs) +{ + return ApacheTrafficServer::memcmp(lhs, rhs); +} + +namespace detail +{ + /// Write padding to the stream, using the current stream fill character. + template + void + stream_fill(Stream &os, std::size_t n) + { + static constexpr size_t pad_size = 8; + typename Stream::char_type padding[pad_size]; + + std::fill_n(padding, pad_size, os.fill()); + for (; n >= pad_size && os.good(); n -= pad_size) + os.write(padding, pad_size); + if (n > 0 && os.good()) + os.write(padding, n); + } + + extern template void stream_fill(std::ostream &, std::size_t); +} // detail + +template +Stream & +StringView::stream_write(Stream &os, const StringView &b) const +{ + const std::size_t w = os.width(); + if (w <= b.size()) { + os.write(b.ptr(), b.size()); + } else { + const std::size_t pad_size = w - b.size(); + const bool align_left = (os.flags() & Stream::adjustfield) == Stream::left; + if (!align_left && os.good()) + detail::stream_fill(os, pad_size); + if (os.good()) + os.write(b.ptr(), b.size()); + if (align_left && os.good()) + detail::stream_fill(os, pad_size); + } + return os; +} + +// Provide an instantiation for @c std::ostream as it's likely this is the only one ever used. +extern template std::ostream &StringView::stream_write(std::ostream &, const StringView &) const; + +} // end namespace ApacheTrafficServer + +namespace std +{ +ostream &operator<<(ostream &os, const ApacheTrafficServer::MemView &b); +ostream &operator<<(ostream &os, const ApacheTrafficServer::StringView &b); +} + +#endif // TS_BUFFER_HEADER diff --git a/lib/ts/test_MemView.cc b/lib/ts/test_MemView.cc new file mode 100644 index 00000000000..1ef9549a25d --- /dev/null +++ b/lib/ts/test_MemView.cc @@ -0,0 +1,52 @@ +/** @file + + MemView testing. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include +#include +#include + +using namespace ApacheTrafficServer; + +bool +Test_1() +{ + std::string text = "01234567"; + StringView a(text); + + std::cout << "Text = |" << a << '|' << std::endl; + std::cout << " = |" << std::setw(5) << a << '|' << std::endl; + std::cout << " = |" << std::setw(12) << a << '|' << std::endl; + std::cout << " = |" << std::setw(12) << std::right << a << '|' << std::endl; + std::cout << " = |" << std::setw(12) << std::left << a << '|' << std::endl; + std::cout << " = |" << std::setw(12) << std::right << std::setfill('_') << a << '|' << std::endl; + std::cout << " = |" << std::setw(12) << std::left << std::setfill('_') << a << '|' << std::endl; + return true; +} + +int +main(int, char *argv[]) +{ + return Test_1() ? 0 : 1; +}