From 2b7d66e408c67a42b43429b941f27a91e090b9b4 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 23 Jan 2024 20:44:56 +0100 Subject: [PATCH 01/10] Add test checking for boost::locale::collator Reproducer for crash due to type confusin as reported in https://github.com/boostorg/locale/issues/210#issuecomment-1876819426 --- src/boost/locale/shared/ids.cpp | 2 +- test/test_generator.cpp | 45 +++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/boost/locale/shared/ids.cpp b/src/boost/locale/shared/ids.cpp index 95a64ff1..e295f214 100644 --- a/src/boost/locale/shared/ids.cpp +++ b/src/boost/locale/shared/ids.cpp @@ -1,11 +1,11 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) +// Copyright (c) 2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt #include -#include #include #include #include diff --git a/test/test_generator.cpp b/test/test_generator.cpp index f138061d..0de07c97 100644 --- a/test/test_generator.cpp +++ b/test/test_generator.cpp @@ -37,6 +37,21 @@ namespace boost { namespace locale { namespace test { { return std::has_facet(l) && is_facet(&std::use_facet(l)); } + + template + bool has_not_facet(const std::locale& l) + { + const Facet* f; + try { + f = &std::use_facet(l); + } catch(const std::bad_cast&) { + return !std::has_facet(l); + } + // This mustn't be reached, checks for debugging + TEST(is_facet(f)); // LCOV_EXCL_LINE + TEST(!std::has_facet(l)); // LCOV_EXCL_LINE + return false; // LCOV_EXCL_LINE + } }}} // namespace boost::locale::test namespace blt = boost::locale::test; @@ -266,25 +281,30 @@ void test_main(int /*argc*/, char** /*argv*/) std::cout << "-- Locale: " << localeName << std::endl; const std::locale l = g(localeName); #ifdef __cpp_char8_t -# define TEST_HAS_FACET_CHAR8(facet, l) TEST(blt::has_facet>(l)) +# define TEST_FOR_CHAR8(check) TEST(check) #else -# define TEST_HAS_FACET_CHAR8(facet, l) (void)0 +# define TEST_FOR_CHAR8(check) (void)0 #endif #ifndef BOOST_LOCALE_NO_CXX20_STRING8 -# define TEST_HAS_FACET_STRING8(facet, l) TEST(blt::has_facet>(l)) +# define TEST_FOR_STRING8(check) TEST(check) #else -# define TEST_HAS_FACET_STRING8(facet, l) (void)0 +# define TEST_FOR_STRING8(check) (void)0 #endif #ifdef BOOST_LOCALE_ENABLE_CHAR16_T -# define TEST_HAS_FACET_CHAR16(facet, l) TEST(blt::has_facet>(l)) +# define TEST_FOR_CHAR16(check) TEST(check) #else -# define TEST_HAS_FACET_CHAR16(facet, l) (void)0 +# define TEST_FOR_CHAR16(check) (void)0 #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T -# define TEST_HAS_FACET_CHAR32(facet, l) TEST(blt::has_facet>(l)) +# define TEST_FOR_CHAR32(check) TEST(check) #else -# define TEST_HAS_FACET_CHAR32(facet, l) (void)0 +# define TEST_FOR_CHAR32(check) (void)0 #endif +#define TEST_HAS_FACET_CHAR8(facet, l) TEST_FOR_CHAR8(blt::has_facet>(l)) +#define TEST_HAS_FACET_CHAR16(facet, l) TEST_FOR_CHAR16(blt::has_facet>(l)) +#define TEST_HAS_FACET_CHAR32(facet, l) TEST_FOR_CHAR32(blt::has_facet>(l)) +#define TEST_HAS_FACET_STRING8(facet, l) TEST_FOR_STRING8(blt::has_facet>(l)) + #define TEST_HAS_FACETS(facet, l) \ do { \ TEST(blt::has_facet>(l)); \ @@ -296,7 +316,16 @@ void test_main(int /*argc*/, char** /*argv*/) // Convert TEST_HAS_FACETS(bl::converter, l); TEST_HAS_FACET_STRING8(bl::converter, l); + // Collator TEST_HAS_FACETS(std::collate, l); + if(backendName == "icu" || (backendName == "winapi" && std::use_facet(l).utf8())) { + TEST_HAS_FACETS(bl::collator, l); + } else { + TEST(blt::has_not_facet>(l)); + TEST(blt::has_not_facet>(l)); + TEST_FOR_CHAR16(blt::has_not_facet>(l)); + TEST_FOR_CHAR32(blt::has_not_facet>(l)); + } // Formatting TEST_HAS_FACETS(std::num_put, l); TEST_HAS_FACETS(std::time_put, l); From 9660cb970aa4a869ebaf3ba2efdfadeda4c0012c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 17:43:13 +0100 Subject: [PATCH 02/10] Fix forwarding of variadic template parameter pack --- src/boost/locale/shared/mo_lambda.cpp | 2 +- test/test_message.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/boost/locale/shared/mo_lambda.cpp b/src/boost/locale/shared/mo_lambda.cpp index a6d2eb6a..7b37ca91 100644 --- a/src/boost/locale/shared/mo_lambda.cpp +++ b/src/boost/locale/shared/mo_lambda.cpp @@ -22,7 +22,7 @@ namespace boost { namespace locale { namespace gnu_gettext { namespace lambda { namespace { // anon template - expr_ptr make_expr(Ts... ts) + expr_ptr make_expr(Ts&&... ts) { return expr_ptr(new TExp(std::forward(ts)...)); } diff --git a/test/test_message.cpp b/test/test_message.cpp index 19323761..304d66c7 100644 --- a/test/test_message.cpp +++ b/test/test_message.cpp @@ -115,7 +115,7 @@ template using has_ctype = decltype(is_complete_impl(std::declval*>())); template -typename std::enable_if::value, bool>::type stream_translate(std::basic_ostream& ss, Ts... args) +typename std::enable_if::value, bool>::type stream_translate(std::basic_ostream& ss, Ts&&... args) { ss << bl::translate(args...); return true; @@ -124,7 +124,7 @@ typename std::enable_if::value, bool>::type stream_translate(std // Required for char types not fully supported by the standard library // e.g.: error: implicit instantiation of undefined template 'std::ctype' template -typename std::enable_if::value, bool>::type stream_translate(std::basic_ostream&, Ts...) +typename std::enable_if::value, bool>::type stream_translate(std::basic_ostream&, Ts&&...) { return false; // LCOV_EXCL_LINE } From 7ab4021585bf99a140f268cf2eaf386ad1526e64 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 17:56:17 +0100 Subject: [PATCH 03/10] ICU: Make get_collator return a reference instead of pointer The value is always non-NULL so return a reference instead. --- src/boost/locale/icu/collator.cpp | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/boost/locale/icu/collator.cpp b/src/boost/locale/icu/collator.cpp index 86f5e51e..cd5a86a9 100644 --- a/src/boost/locale/icu/collator.cpp +++ b/src/boost/locale/icu/collator.cpp @@ -13,6 +13,7 @@ #include "boost/locale/shared/mo_hash.hpp" #include #include +#include #include #include #if BOOST_LOCALE_ICU_VERSION >= 402 @@ -51,7 +52,7 @@ namespace boost { namespace locale { namespace impl_icu { { icu::StringPiece left(b1, e1 - b1); icu::StringPiece right(b2, e2 - b2); - return get_collator(level)->compareUTF8(left, right, status); + return get_collator(level).compareUTF8(left, right, status); } #endif @@ -64,7 +65,7 @@ namespace boost { namespace locale { namespace impl_icu { { icu::UnicodeString left = cvt_.icu(b1, e1); icu::UnicodeString right = cvt_.icu(b2, e2); - return get_collator(level)->compare(left, right, status); + return get_collator(level).compare(left, right, status); } int do_real_compare(collate_level level, @@ -101,11 +102,11 @@ namespace boost { namespace locale { namespace impl_icu { icu::UnicodeString str = cvt_.icu(b, e); std::vector tmp; tmp.resize(str.length() + 1u); - icu::Collator* collate = get_collator(level); - const int len = collate->getSortKey(str, tmp.data(), tmp.size()); + icu::Collator& collate = get_collator(level); + const int len = collate.getSortKey(str, tmp.data(), tmp.size()); if(len > int(tmp.size())) { tmp.resize(len); - collate->getSortKey(str, tmp.data(), tmp.size()); + collate.getSortKey(str, tmp.data(), tmp.size()); } else tmp.resize(len); return tmp; @@ -126,7 +127,7 @@ namespace boost { namespace locale { namespace impl_icu { collate_impl(const cdata& d) : cvt_(d.encoding()), locale_(d.locale()), is_utf8_(d.is_utf8()) {} - icu::Collator* get_collator(collate_level level) const + icu::Collator& get_collator(collate_level level) const { const int lvl_idx = level_to_int(level); constexpr icu::Collator::ECollationStrength levels[level_count] = {icu::Collator::PRIMARY, @@ -136,18 +137,17 @@ namespace boost { namespace locale { namespace impl_icu { icu::Collator::IDENTICAL}; icu::Collator* col = collates_[lvl_idx].get(); - if(col) - return col; - - UErrorCode status = U_ZERO_ERROR; - - collates_[lvl_idx].reset(icu::Collator::createInstance(locale_, status)); - - if(U_FAILURE(status)) - throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status)); - - collates_[lvl_idx]->setStrength(levels[lvl_idx]); - return collates_[lvl_idx].get(); + if(!col) { + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr tmp_col(icu::Collator::createInstance(locale_, status)); + if(U_FAILURE(status)) + throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status)); + + tmp_col->setStrength(levels[lvl_idx]); + col = tmp_col.release(); + collates_[lvl_idx].reset(col); + } + return *col; } private: From 4fc8ed6458645b566465d29509b2893945df7908 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 18:05:50 +0100 Subject: [PATCH 04/10] Fix documentation of `collator` The default level is "identical" not "primary". Also fix some typos in the documentation. --- include/boost/locale/collator.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/boost/locale/collator.hpp b/include/boost/locale/collator.hpp index b45f1547..d39f329c 100644 --- a/include/boost/locale/collator.hpp +++ b/include/boost/locale/collator.hpp @@ -53,7 +53,7 @@ namespace boost { namespace locale { /// Type of string used with this facet typedef std::basic_string string_type; - /// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare + /// Compare two strings in range [b1,e1), [b2,e2) according to collation level \a level. Calls do_compare /// /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if /// they considered equal. @@ -107,7 +107,7 @@ namespace boost { namespace locale { /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple /// strings. /// - /// The transformation follows these rules: + /// The transformation follows this rule: /// \code /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) ); /// \endcode @@ -121,7 +121,7 @@ namespace boost { namespace locale { collator(size_t refs = 0) : std::collate(refs) {} /// This function is used to override default collation function that does not take in account collation level. - /// Uses primary level + /// Uses identical level int do_compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const override { @@ -129,14 +129,14 @@ namespace boost { namespace locale { } /// This function is used to override default collation function that does not take in account collation level. - /// Uses primary level + /// Uses identical level string_type do_transform(const char_type* b, const char_type* e) const override { return do_transform(collate_level::identical, b, e); } /// This function is used to override default collation function that does not take in account collation level. - /// Uses primary level + /// Uses identical level long do_hash(const char_type* b, const char_type* e) const override { return do_hash(collate_level::identical, b, e); @@ -157,7 +157,7 @@ namespace boost { namespace locale { }; /// \brief This class can be used in STL algorithms and containers for comparison of strings - /// with a level other than primary + /// with a level other than identical /// /// For example: /// @@ -169,7 +169,7 @@ namespace boost { namespace locale { template struct comparator { public: - /// Create a comparator class for locale \a l and with collation leval \a level + /// Create a comparator class for locale \a l and with collation level \a level /// /// \throws std::bad_cast: \a l does not have \ref collator facet installed comparator(const std::locale& l = std::locale(), collate_level level = default_level) : From f3be6c8f0b6e1b4cbdd8a850033a6a1d3ce38028 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 18:06:34 +0100 Subject: [PATCH 05/10] Fix `comparator` throwing behavior As documented this should throw on construction not on use. --- include/boost/locale/collator.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/boost/locale/collator.hpp b/include/boost/locale/collator.hpp index d39f329c..e810c0b0 100644 --- a/include/boost/locale/collator.hpp +++ b/include/boost/locale/collator.hpp @@ -173,17 +173,18 @@ namespace boost { namespace locale { /// /// \throws std::bad_cast: \a l does not have \ref collator facet installed comparator(const std::locale& l = std::locale(), collate_level level = default_level) : - locale_(l), level_(level) + locale_(l), collator_(std::use_facet>(locale_)), level_(level) {} /// Compare two strings -- equivalent to return left < right according to collation rules bool operator()(const std::basic_string& left, const std::basic_string& right) const { - return std::use_facet>(locale_).compare(level_, left, right) < 0; + return collator_.compare(level_, left, right) < 0; } private: std::locale locale_; + const collator& collator_; collate_level level_; }; From 888d89b69fa3309d80251eeea99fcc0c542fc9a5 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 19:56:28 +0100 Subject: [PATCH 06/10] Make winlocale a struct with ctor from ID --- src/boost/locale/win32/all_generator.hpp | 2 +- src/boost/locale/win32/api.hpp | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/boost/locale/win32/all_generator.hpp b/src/boost/locale/win32/all_generator.hpp index c175d5be..3cf7da94 100644 --- a/src/boost/locale/win32/all_generator.hpp +++ b/src/boost/locale/win32/all_generator.hpp @@ -12,7 +12,7 @@ namespace boost { namespace locale { namespace impl_win { - class winlocale; + struct winlocale; std::locale create_convert(const std::locale& in, const winlocale& lc, char_facet_t type); diff --git a/src/boost/locale/win32/api.hpp b/src/boost/locale/win32/api.hpp index aeda7ae6..d3254f9a 100644 --- a/src/boost/locale/win32/api.hpp +++ b/src/boost/locale/win32/api.hpp @@ -48,15 +48,13 @@ namespace boost { namespace locale { namespace impl_win { return 0; } - class winlocale { - public: - winlocale() : lcid(0) {} + struct winlocale { + explicit winlocale(unsigned locale_id = 0) : lcid(locale_id) {} + explicit winlocale(const std::string& name) { lcid = locale_to_lcid(name); } - winlocale(const std::string& name) { lcid = locale_to_lcid(name); } + bool is_c() const { return lcid == 0; } unsigned lcid; - - bool is_c() const { return lcid == 0; } }; //////////////////////////////////////////////////////////////////////// @@ -195,8 +193,7 @@ namespace boost { namespace locale { namespace impl_win { inline std::wstring wcsfold(const wchar_t* begin, const wchar_t* end) { - winlocale l; - l.lcid = 0x0409; // en-US + const winlocale l(0x0409); // en-US return win_map_string_l(LCMAP_LOWERCASE, begin, end, l); } From 6eec46bcfa2162e6c9ea5d9b782b735fd8034b8a Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 19:57:24 +0100 Subject: [PATCH 07/10] Cleanup win32/api.hpp - Remove single-use local variables - Use nullptr instead of 0 - Assert return value --- src/boost/locale/win32/api.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/boost/locale/win32/api.hpp b/src/boost/locale/win32/api.hpp index d3254f9a..2885026c 100644 --- a/src/boost/locale/win32/api.hpp +++ b/src/boost/locale/win32/api.hpp @@ -27,6 +27,7 @@ #include #include +#include namespace boost { namespace locale { namespace impl_win { @@ -147,6 +148,7 @@ namespace boost { namespace locale { namespace impl_win { static_cast(le - lb), rb, static_cast(re - rb)); + BOOST_ASSERT_MSG(result != 0, "CompareStringW failed"); return result - 2; // Subtract 2 to get the meaning of <0, ==0, and >0 } @@ -211,7 +213,7 @@ namespace boost { namespace locale { namespace impl_win { if(end - begin > std::numeric_limits::max()) throw std::length_error("String to long for int type"); - int len = FoldStringW(flags, begin, static_cast(end - begin), 0, 0); + int len = FoldStringW(flags, begin, static_cast(end - begin), nullptr, 0); if(len == 0) return std::wstring(); if(len == std::numeric_limits::max()) @@ -223,9 +225,7 @@ namespace boost { namespace locale { namespace impl_win { inline std::wstring wcsxfrm_l(collate_level level, const wchar_t* begin, const wchar_t* end, const winlocale& l) { - int flag = LCMAP_SORTKEY | collation_level_to_flag(level); - - return win_map_string_l(flag, begin, end, l); + return win_map_string_l(LCMAP_SORTKEY | collation_level_to_flag(level), begin, end, l); } inline std::wstring towupper_l(const wchar_t* begin, const wchar_t* end, const winlocale& l) From f29d950d80ad62d56a2056f4effe9a62e1e4c92e Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Feb 2024 20:02:06 +0100 Subject: [PATCH 08/10] Fix type confusion with `boost::locale::collator` The class derived from `std::collate` which is always present in `std::locale`. So checks for the `boost::locale::collator` facet may return wrongly true. As a fix make this an independent facet with its own ID. Use an adapter such that a std::collate derived class can use its abilities. --- CMakeLists.txt | 1 + doc/changelog.txt | 5 +- include/boost/locale/collator.hpp | 48 +++---- src/boost/locale/icu/collator.cpp | 13 +- src/boost/locale/shared/ids.cpp | 3 + .../locale/shared/std_collate_adapter.hpp | 58 ++++++++ src/boost/locale/win32/collate.cpp | 134 ++++++++++++------ test/test_generator.cpp | 46 ++++++ 8 files changed, 235 insertions(+), 73 deletions(-) create mode 100644 src/boost/locale/shared/std_collate_adapter.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index eae5fcf2..f47ad394 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ add_library(boost_locale src/boost/locale/shared/localization_backend.cpp src/boost/locale/shared/message.cpp src/boost/locale/shared/mo_lambda.cpp + src/boost/locale/shared/std_collate_adapter.hpp src/boost/locale/util/codecvt_converter.cpp src/boost/locale/util/default_locale.cpp src/boost/locale/util/encoding.cpp diff --git a/doc/changelog.txt b/doc/changelog.txt index 267a5fc7..4d539b50 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,6 +1,6 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) -// Copyright (c) 2021-2023 Alexander Grund +// Copyright (c) 2021-2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -8,6 +8,9 @@ /*! \page changelog Changelog +- 1.85.0 + - Breaking changes + - `collator` does no longer derive from `std::collator` avoiding possible type confusion - 1.84.0 - Breaking changes - `to_title` for the WinAPI backend returns the string unchanged instead of an empty string diff --git a/include/boost/locale/collator.hpp b/include/boost/locale/collator.hpp index e810c0b0..693d33a6 100644 --- a/include/boost/locale/collator.hpp +++ b/include/boost/locale/collator.hpp @@ -8,6 +8,7 @@ #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED #include +#include #include #ifdef BOOST_MSVC @@ -43,10 +44,9 @@ namespace boost { namespace locale { /// \brief Collation facet. /// - /// It reimplements standard C++ std::collate, - /// allowing usage of std::locale for direct string comparison + /// It reimplements standard C++ std::collate with support for collation levels template - class collator : public std::collate { + class BOOST_SYMBOL_VISIBLE collator : public std::locale::facet, public detail::facet_id> { public: /// Type of the underlying character typedef CharType char_type; @@ -66,6 +66,13 @@ namespace boost { namespace locale { return do_compare(level, b1, e1, b2, e2); } + /// Default compare function as-in std::collate that does not take collation level into account. + /// Uses identical level + int compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const + { + return compare(collate_level::identical, b1, e1, b2, e2); + } + /// Create a binary string that can be compared to other in order to get collation order. The string is created /// for text in range [b,e). It is useful for collation of multiple strings for text. /// @@ -80,6 +87,13 @@ namespace boost { namespace locale { return do_transform(level, b, e); } + /// Default transform function as-in std::collate that does not take collation level into account. + /// Uses identical level + string_type transform(const char_type* b, const char_type* e) const + { + return transform(collate_level::identical, b, e); + } + /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison. /// /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2) @@ -87,6 +101,10 @@ namespace boost { namespace locale { /// Calls do_hash long hash(collate_level level, const char_type* b, const char_type* e) const { return do_hash(level, b, e); } + /// Default hash function as-in std::collate that does not take collation level into account. + /// Uses identical level + long hash(const char_type* b, const char_type* e) const { return hash(collate_level::identical, b, e); } + /// Compare two strings \a l and \a r using collation level \a level /// /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if @@ -118,29 +136,7 @@ namespace boost { namespace locale { protected: /// constructor of the collator object - collator(size_t refs = 0) : std::collate(refs) {} - - /// This function is used to override default collation function that does not take in account collation level. - /// Uses identical level - int - do_compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const override - { - return do_compare(collate_level::identical, b1, e1, b2, e2); - } - - /// This function is used to override default collation function that does not take in account collation level. - /// Uses identical level - string_type do_transform(const char_type* b, const char_type* e) const override - { - return do_transform(collate_level::identical, b, e); - } - - /// This function is used to override default collation function that does not take in account collation level. - /// Uses identical level - long do_hash(const char_type* b, const char_type* e) const override - { - return do_hash(collate_level::identical, b, e); - } + collator(size_t refs = 0) : std::locale::facet(refs) {} /// Actual function that performs comparison between the strings. For details see compare member function. Can /// be overridden. diff --git a/src/boost/locale/icu/collator.cpp b/src/boost/locale/icu/collator.cpp index cd5a86a9..7852ea31 100644 --- a/src/boost/locale/icu/collator.cpp +++ b/src/boost/locale/icu/collator.cpp @@ -11,6 +11,7 @@ #include "boost/locale/icu/icu_util.hpp" #include "boost/locale/icu/uconv.hpp" #include "boost/locale/shared/mo_hash.hpp" +#include "boost/locale/shared/std_collate_adapter.hpp" #include #include #include @@ -173,21 +174,21 @@ namespace boost { namespace locale { namespace impl_icu { return do_ustring_compare(level, b1, e1, b2, e2, status); } #endif - std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type) { switch(type) { case char_facet_t::nochar: break; - case char_facet_t::char_f: return std::locale(in, new collate_impl(cd)); - case char_facet_t::wchar_f: return std::locale(in, new collate_impl(cd)); + case char_facet_t::char_f: return impl::create_collators(in, cd); + case char_facet_t::wchar_f: return impl::create_collators(in, cd); #ifdef __cpp_char8_t - case char_facet_t::char8_f: break; // std-facet not available (yet) + case char_facet_t::char8_f: + return std::locale(in, new collate_impl(cd)); // std-facet not available (yet) #endif #ifdef BOOST_LOCALE_ENABLE_CHAR16_T - case char_facet_t::char16_f: return std::locale(in, new collate_impl(cd)); + case char_facet_t::char16_f: : return impl::create_collators(in, cd); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T - case char_facet_t::char32_f: return std::locale(in, new collate_impl(cd)); + case char_facet_t::char32_f: : return impl::create_collators(in, cd); #endif } return in; diff --git a/src/boost/locale/shared/ids.cpp b/src/boost/locale/shared/ids.cpp index e295f214..819de7f9 100644 --- a/src/boost/locale/shared/ids.cpp +++ b/src/boost/locale/shared/ids.cpp @@ -6,6 +6,7 @@ // https://www.boost.org/LICENSE_1_0.txt #include +#include #include #include #include @@ -24,6 +25,7 @@ namespace boost { namespace locale { BOOST_LOCALE_DEFINE_ID(calendar_facet); #define BOOST_LOCALE_INSTANTIATE(CHARTYPE) \ + BOOST_LOCALE_DEFINE_ID(collator); \ BOOST_LOCALE_DEFINE_ID(converter); \ BOOST_LOCALE_DEFINE_ID(message_format); \ BOOST_LOCALE_DEFINE_ID(boundary::boundary_indexing); @@ -48,6 +50,7 @@ namespace boost { namespace locale { void init_by(const std::locale& l) { init_facet>(l); + init_facet>(l); init_facet>(l); init_facet>(l); } diff --git a/src/boost/locale/shared/std_collate_adapter.hpp b/src/boost/locale/shared/std_collate_adapter.hpp new file mode 100644 index 00000000..ee4ff43b --- /dev/null +++ b/src/boost/locale/shared/std_collate_adapter.hpp @@ -0,0 +1,58 @@ +// +// Copyright (c) 2024 Alexander Grund +// +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP +#define BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP + +#include +#include +#include + +namespace boost { namespace locale { namespace impl { + + template + class BOOST_SYMBOL_VISIBLE std_collate_adapter : public std::collate { + public: + using typename std::collate::string_type; + + template + explicit std_collate_adapter(TArgs&&... args) : base_(std::forward(args)...) + {} + + protected: + int do_compare(const CharT* beg1, const CharT* end1, const CharT* beg2, const CharT* end2) const override + { + return base_.compare(collate_level::identical, beg1, end1, beg2, end2); + } + + string_type do_transform(const CharT* beg, const CharT* end) const override + { + return base_.transform(collate_level::identical, beg, end); + } + long do_hash(const CharT* beg, const CharT* end) const override + { + return base_.hash(collate_level::identical, beg, end); + } + Base base_; + }; + + template + static std::locale create_collators(const std::locale& in, TArgs&&... args) + { + static_assert(std::is_base_of, CollatorImpl>::value, "Must be a collator implementation"); + std::locale res(in, new CollatorImpl(args...)); + return std::locale(res, new std_collate_adapter(args...)); + } + + template class CollatorImpl, typename... TArgs> + static std::locale create_collators(const std::locale& in, TArgs&&... args) + { + return create_collators>(in, args...); + } + +}}} // namespace boost::locale::impl + +#endif diff --git a/src/boost/locale/win32/collate.cpp b/src/boost/locale/win32/collate.cpp index 107fed08..16fe38d1 100644 --- a/src/boost/locale/win32/collate.cpp +++ b/src/boost/locale/win32/collate.cpp @@ -7,33 +7,71 @@ #include #include #include "boost/locale/shared/mo_hash.hpp" +#include "boost/locale/shared/std_collate_adapter.hpp" #include "boost/locale/win32/api.hpp" #include #include #include +#include namespace boost { namespace locale { namespace impl_win { + template + using enable_if_sizeof_wchar_t = typename std::enable_if::type; + template + using disable_if_sizeof_wchar_t = typename std::enable_if::type; - class utf8_collator : public collator { - public: - utf8_collator(winlocale lc, size_t refs = 0) : collator(refs), lc_(lc) {} - int - do_compare(collate_level level, const char* lb, const char* le, const char* rb, const char* re) const override + namespace { + template + enable_if_sizeof_wchar_t compare_impl(collate_level level, + const CharT* lb, + const CharT* le, + const CharT* rb, + const CharT* re, + const winlocale& wl) + { + return wcscoll_l(level, + reinterpret_cast(lb), + reinterpret_cast(le), + reinterpret_cast(rb), + reinterpret_cast(re), + wl); + } + + template + disable_if_sizeof_wchar_t compare_impl(collate_level level, + const CharT* lb, + const CharT* le, + const CharT* rb, + const CharT* re, + const winlocale& wl) { const std::wstring l = conv::utf_to_utf(lb, le); const std::wstring r = conv::utf_to_utf(rb, re); - return wcscoll_l(level, l.c_str(), l.c_str() + l.size(), r.c_str(), r.c_str() + r.size(), lc_); + return wcscoll_l(level, l.c_str(), l.c_str() + l.size(), r.c_str(), r.c_str() + r.size(), wl); } - long do_hash(collate_level level, const char* b, const char* e) const override + + template + enable_if_sizeof_wchar_t + normalize_impl(collate_level level, const CharT* b, const CharT* e, const winlocale& l) { - const std::string key = do_transform(level, b, e); - return gnu_gettext::pj_winberger_hash_function(key.c_str(), key.c_str() + key.size()); + return wcsxfrm_l(level, reinterpret_cast(b), reinterpret_cast(e), l); } - std::string do_transform(collate_level level, const char* b, const char* e) const override + + template + disable_if_sizeof_wchar_t + normalize_impl(collate_level level, const CharT* b, const CharT* e, const winlocale& l) { const std::wstring tmp = conv::utf_to_utf(b, e); - const std::wstring wkey = wcsxfrm_l(level, tmp.c_str(), tmp.c_str() + tmp.size(), lc_); - std::string key; + return wcsxfrm_l(level, tmp.c_str(), tmp.c_str() + tmp.size(), l); + } + + template + typename std::enable_if>::type + transform_impl(collate_level level, const CharT* b, const CharT* e, const winlocale& l) + { + const std::wstring wkey = normalize_impl(level, b, e, l); + + std::basic_string key; BOOST_LOCALE_START_CONST_CONDITION if(sizeof(wchar_t) == 2) key.reserve(wkey.size() * 2); @@ -42,46 +80,61 @@ namespace boost { namespace locale { namespace impl_win { for(const wchar_t c : wkey) { if(sizeof(wchar_t) == 2) { const uint16_t tv = static_cast(c); - key += char(tv >> 8); - key += char(tv & 0xFF); + key += CharT(tv >> 8); + key += CharT(tv & 0xFF); } else { // 4 const uint32_t tv = static_cast(c); // 21 bit - key += char((tv >> 16) & 0xFF); - key += char((tv >> 8) & 0xFF); - key += char(tv & 0xFF); + key += CharT((tv >> 16) & 0xFF); + key += CharT((tv >> 8) & 0xFF); + key += CharT(tv & 0xFF); } } BOOST_LOCALE_END_CONST_CONDITION return key; } - private: - winlocale lc_; - }; + template + typename std::enable_if::value, std::wstring>::type + transform_impl(collate_level level, const CharT* b, const CharT* e, const winlocale& l) + { + return normalize_impl(level, b, e, l); + } + + template + typename std::enable_if= sizeof(wchar_t) && !std::is_same::value, + std::basic_string>::type + transform_impl(collate_level level, const CharT* b, const CharT* e, const winlocale& l) + { + const std::wstring wkey = normalize_impl(level, b, e, l); + return std::basic_string(wkey.begin(), wkey.end()); + } + } // namespace - class utf16_collator : public collator { + template + class utf_collator : public collator { public: - typedef std::collate wfacet; - utf16_collator(winlocale lc, size_t refs = 0) : collator(refs), lc_(lc) {} + using typename collator::string_type; + + explicit utf_collator(winlocale lc) : collator(), lc_(lc) {} + int do_compare(collate_level level, - const wchar_t* lb, - const wchar_t* le, - const wchar_t* rb, - const wchar_t* re) const override + const CharT* lb, + const CharT* le, + const CharT* rb, + const CharT* re) const override { - return wcscoll_l(level, lb, le, rb, re, lc_); + return compare_impl(level, lb, le, rb, re, lc_); } - long do_hash(collate_level level, const wchar_t* b, const wchar_t* e) const override + long do_hash(collate_level level, const CharT* b, const CharT* e) const override { - std::wstring key = do_transform(level, b, e); - const char* begin = reinterpret_cast(key.c_str()); - const char* end = begin + key.size() * sizeof(wchar_t); - return gnu_gettext::pj_winberger_hash_function(begin, end); + const std::wstring key = normalize_impl(level, b, e, lc_); + return gnu_gettext::pj_winberger_hash_function(reinterpret_cast(key.c_str()), + reinterpret_cast(key.c_str() + key.size())); } - std::wstring do_transform(collate_level level, const wchar_t* b, const wchar_t* e) const override + string_type do_transform(collate_level level, const CharT* b, const CharT* e) const override { - return wcsxfrm_l(level, b, e, lc_); + return transform_impl(level, b, e, lc_); } private: @@ -108,16 +161,17 @@ namespace boost { namespace locale { namespace impl_win { } else { switch(type) { case char_facet_t::nochar: break; - case char_facet_t::char_f: return std::locale(in, new utf8_collator(lc)); - case char_facet_t::wchar_f: return std::locale(in, new utf16_collator(lc)); + case char_facet_t::char_f: return impl::create_collators>(in, lc); + case char_facet_t::wchar_f: return impl::create_collators>(in, lc); #ifdef __cpp_char8_t - case char_facet_t::char8_f: break; // std-facet not available (yet) + case char_facet_t::char8_f: + return std::locale(in, new utf_collator(lc)); // std-facet not available (yet) #endif #ifdef BOOST_LOCALE_ENABLE_CHAR16_T - case char_facet_t::char16_f: break; + case char_facet_t::char16_f: return impl::create_collators>(in, lc); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T - case char_facet_t::char32_f: break; + case char_facet_t::char32_f: return impl::create_collators>(in, lc); #endif } } diff --git a/test/test_generator.cpp b/test/test_generator.cpp index 0de07c97..084101bf 100644 --- a/test/test_generator.cpp +++ b/test/test_generator.cpp @@ -247,6 +247,49 @@ void test_install_chartype(const std::string& backendName) } } +template +struct dummy_collate : std::collate {}; + +template +bool has_dummy_collate(const std::locale& l) +{ + const auto& col = std::use_facet>(l); // Implicitely require existance of std::collate + return blt::is_facet>(&col); +} + +void test_std_collate_replaced(const std::string& /*backendName*/) +{ + std::locale origLocale = std::locale::classic(); + origLocale = std::locale(origLocale, new dummy_collate); + origLocale = std::locale(origLocale, new dummy_collate); +#ifdef BOOST_LOCALE_ENABLE_CHAR16_T + origLocale = std::locale(origLocale, new dummy_collate); +#endif +#ifdef BOOST_LOCALE_ENABLE_CHAR32_T + origLocale = std::locale(origLocale, new dummy_collate); +#endif + + // Use ASCII and UTF-8 encoding + for(const std::string localeName : {"C", "en_US.UTF-8"}) { + std::cout << "--- Locale: " << localeName << std::endl; + bl::generator g; + g.categories(boost::locale::category_t::collation); + const std::locale l = g.generate(origLocale, localeName); + TEST(has_dummy_collate(origLocale)); + TEST(!has_dummy_collate(l)); + TEST(has_dummy_collate(origLocale)); + TEST(!has_dummy_collate(l)); +#ifdef BOOST_LOCALE_ENABLE_CHAR16_T + TEST(has_dummy_collate(origLocale)); + TEST(!has_dummy_collate(l)); +#endif +#ifdef BOOST_LOCALE_ENABLE_CHAR32_T + TEST(has_dummy_collate(origLocale)); + TEST(!has_dummy_collate(l)); +#endif + } +} + void test_main(int /*argc*/, char** /*argv*/) { { @@ -320,9 +363,11 @@ void test_main(int /*argc*/, char** /*argv*/) TEST_HAS_FACETS(std::collate, l); if(backendName == "icu" || (backendName == "winapi" && std::use_facet(l).utf8())) { TEST_HAS_FACETS(bl::collator, l); + TEST_HAS_FACET_STRING8(bl::collator, l); } else { TEST(blt::has_not_facet>(l)); TEST(blt::has_not_facet>(l)); + TEST_FOR_STRING8(blt::has_not_facet>(l)); TEST_FOR_CHAR16(blt::has_not_facet>(l)); TEST_FOR_CHAR32(blt::has_not_facet>(l)); } @@ -425,6 +470,7 @@ void test_main(int /*argc*/, char** /*argv*/) TEST(!std::use_facet(g("en_US.ISO8859-1")).utf8()); test_install_chartype(backendName); + test_std_collate_replaced(backendName); } std::cout << "Test special locales" << std::endl; test_special_locales(); From dffe034cc21e9b268bf12481f8e7c2246a2984d4 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 8 Feb 2024 12:38:22 +0100 Subject: [PATCH 09/10] Remove superflous colons --- src/boost/locale/icu/collator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/boost/locale/icu/collator.cpp b/src/boost/locale/icu/collator.cpp index 7852ea31..72ade74f 100644 --- a/src/boost/locale/icu/collator.cpp +++ b/src/boost/locale/icu/collator.cpp @@ -185,10 +185,10 @@ namespace boost { namespace locale { namespace impl_icu { return std::locale(in, new collate_impl(cd)); // std-facet not available (yet) #endif #ifdef BOOST_LOCALE_ENABLE_CHAR16_T - case char_facet_t::char16_f: : return impl::create_collators(in, cd); + case char_facet_t::char16_f: return impl::create_collators(in, cd); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T - case char_facet_t::char32_f: : return impl::create_collators(in, cd); + case char_facet_t::char32_f: return impl::create_collators(in, cd); #endif } return in; From 1bd9b6cbe77967867de91f404b36172ecd6a8679 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 8 Feb 2024 12:43:10 +0100 Subject: [PATCH 10/10] coverage: Exclude unreachable lines in win32/api.hpp --- src/boost/locale/win32/api.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/boost/locale/win32/api.hpp b/src/boost/locale/win32/api.hpp index 2885026c..33791ef9 100644 --- a/src/boost/locale/win32/api.hpp +++ b/src/boost/locale/win32/api.hpp @@ -46,7 +46,7 @@ namespace boost { namespace locale { namespace impl_win { case collate_level::quaternary: case collate_level::identical: return 0; } - return 0; + return 0; // LCOV_EXCL_LINE } struct winlocale { @@ -86,7 +86,7 @@ namespace boost { namespace locale { namespace impl_win { || GetLocaleInfoW(lcid, LOCALE_SDECIMAL, de, de_size) == 0 || GetLocaleInfoW(lcid, LOCALE_SGROUPING, gr, gr_size) == 0) { - return res; + return res; // LCOV_EXCL_LINE } res.decimal_point = de; res.thousands_sep = th; @@ -117,7 +117,7 @@ namespace boost { namespace locale { namespace impl_win { throw std::length_error("String to long for int type"); int len = LCMapStringW(l.lcid, flags, begin, static_cast(end - begin), 0, 0); if(len == 0) - return res; + return res; // LCOV_EXCL_LINE if(len == std::numeric_limits::max()) throw std::length_error("String to long for int type"); std::vector buf(len + 1); @@ -215,7 +215,7 @@ namespace boost { namespace locale { namespace impl_win { throw std::length_error("String to long for int type"); int len = FoldStringW(flags, begin, static_cast(end - begin), nullptr, 0); if(len == 0) - return std::wstring(); + return std::wstring(); // LCOV_EXCL_LINE if(len == std::numeric_limits::max()) throw std::length_error("String to long for int type"); std::vector v(len + 1);