Skip to content

Commit

Permalink
Merge pull request #216 from Flamefire/collator-fix
Browse files Browse the repository at this point in the history
Fix possible type confusion with `boost::locale::collator`
  • Loading branch information
Flamefire committed Feb 8, 2024
2 parents 941f853 + 1bd9b6c commit d709f92
Show file tree
Hide file tree
Showing 12 changed files with 314 additions and 125 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_library(boost_locale
src/boost/locale/shared/localization_backend.cpp
src/boost/locale/shared/message.cpp
src/boost/locale/shared/mo_lambda.cpp
src/boost/locale/shared/std_collate_adapter.hpp
src/boost/locale/util/codecvt_converter.cpp
src/boost/locale/util/default_locale.cpp
src/boost/locale/util/encoding.cpp
Expand Down
5 changes: 4 additions & 1 deletion doc/changelog.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2021-2023 Alexander Grund
// Copyright (c) 2021-2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

/*!
\page changelog Changelog

- 1.85.0
- Breaking changes
- `collator` does no longer derive from `std::collator` avoiding possible type confusion
- 1.84.0
- Breaking changes
- `to_title` for the WinAPI backend returns the string unchanged instead of an empty string
Expand Down
61 changes: 29 additions & 32 deletions include/boost/locale/collator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define BOOST_LOCALE_COLLATOR_HPP_INCLUDED

#include <boost/locale/config.hpp>
#include <boost/locale/detail/facet_id.hpp>
#include <locale>

#ifdef BOOST_MSVC
Expand Down Expand Up @@ -43,17 +44,16 @@ namespace boost { namespace locale {

/// \brief Collation facet.
///
/// It reimplements standard C++ std::collate,
/// allowing usage of std::locale for direct string comparison
/// It reimplements standard C++ std::collate with support for collation levels
template<typename CharType>
class collator : public std::collate<CharType> {
class BOOST_SYMBOL_VISIBLE collator : public std::locale::facet, public detail::facet_id<collator<CharType>> {
public:
/// Type of the underlying character
typedef CharType char_type;
/// Type of string used with this facet
typedef std::basic_string<CharType> string_type;

/// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare
/// Compare two strings in range [b1,e1), [b2,e2) according to collation level \a level. Calls do_compare
///
/// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
/// they considered equal.
Expand All @@ -66,6 +66,13 @@ namespace boost { namespace locale {
return do_compare(level, b1, e1, b2, e2);
}

/// Default compare function as-in std::collate that does not take collation level into account.
/// Uses identical level
int compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const
{
return compare(collate_level::identical, b1, e1, b2, e2);
}

/// Create a binary string that can be compared to other in order to get collation order. The string is created
/// for text in range [b,e). It is useful for collation of multiple strings for text.
///
Expand All @@ -80,13 +87,24 @@ namespace boost { namespace locale {
return do_transform(level, b, e);
}

/// Default transform function as-in std::collate that does not take collation level into account.
/// Uses identical level
string_type transform(const char_type* b, const char_type* e) const
{
return transform(collate_level::identical, b, e);
}

/// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
///
/// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
///
/// Calls do_hash
long hash(collate_level level, const char_type* b, const char_type* e) const { return do_hash(level, b, e); }

/// Default hash function as-in std::collate that does not take collation level into account.
/// Uses identical level
long hash(const char_type* b, const char_type* e) const { return hash(collate_level::identical, b, e); }

/// Compare two strings \a l and \a r using collation level \a level
///
/// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
Expand All @@ -107,7 +125,7 @@ namespace boost { namespace locale {
/// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
/// strings.
///
/// The transformation follows these rules:
/// The transformation follows this rule:
/// \code
/// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
/// \endcode
Expand All @@ -118,29 +136,7 @@ namespace boost { namespace locale {

protected:
/// constructor of the collator object
collator(size_t refs = 0) : std::collate<CharType>(refs) {}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
int
do_compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const override
{
return do_compare(collate_level::identical, b1, e1, b2, e2);
}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
string_type do_transform(const char_type* b, const char_type* e) const override
{
return do_transform(collate_level::identical, b, e);
}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
long do_hash(const char_type* b, const char_type* e) const override
{
return do_hash(collate_level::identical, b, e);
}
collator(size_t refs = 0) : std::locale::facet(refs) {}

/// Actual function that performs comparison between the strings. For details see compare member function. Can
/// be overridden.
Expand All @@ -157,7 +153,7 @@ namespace boost { namespace locale {
};

/// \brief This class can be used in STL algorithms and containers for comparison of strings
/// with a level other than primary
/// with a level other than identical
///
/// For example:
///
Expand All @@ -169,21 +165,22 @@ namespace boost { namespace locale {
template<typename CharType, collate_level default_level = collate_level::identical>
struct comparator {
public:
/// Create a comparator class for locale \a l and with collation leval \a level
/// Create a comparator class for locale \a l and with collation level \a level
///
/// \throws std::bad_cast: \a l does not have \ref collator facet installed
comparator(const std::locale& l = std::locale(), collate_level level = default_level) :
locale_(l), level_(level)
locale_(l), collator_(std::use_facet<collator<CharType>>(locale_)), level_(level)
{}

/// Compare two strings -- equivalent to return left < right according to collation rules
bool operator()(const std::basic_string<CharType>& left, const std::basic_string<CharType>& right) const
{
return std::use_facet<collator<CharType>>(locale_).compare(level_, left, right) < 0;
return collator_.compare(level_, left, right) < 0;
}

private:
std::locale locale_;
const collator<CharType>& collator_;
collate_level level_;
};

Expand Down
49 changes: 25 additions & 24 deletions src/boost/locale/icu/collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
#include "boost/locale/icu/icu_util.hpp"
#include "boost/locale/icu/uconv.hpp"
#include "boost/locale/shared/mo_hash.hpp"
#include "boost/locale/shared/std_collate_adapter.hpp"
#include <boost/thread.hpp>
#include <limits>
#include <memory>
#include <unicode/coll.h>
#include <vector>
#if BOOST_LOCALE_ICU_VERSION >= 402
Expand Down Expand Up @@ -51,7 +53,7 @@ namespace boost { namespace locale { namespace impl_icu {
{
icu::StringPiece left(b1, e1 - b1);
icu::StringPiece right(b2, e2 - b2);
return get_collator(level)->compareUTF8(left, right, status);
return get_collator(level).compareUTF8(left, right, status);
}
#endif

Expand All @@ -64,7 +66,7 @@ namespace boost { namespace locale { namespace impl_icu {
{
icu::UnicodeString left = cvt_.icu(b1, e1);
icu::UnicodeString right = cvt_.icu(b2, e2);
return get_collator(level)->compare(left, right, status);
return get_collator(level).compare(left, right, status);
}

int do_real_compare(collate_level level,
Expand Down Expand Up @@ -101,11 +103,11 @@ namespace boost { namespace locale { namespace impl_icu {
icu::UnicodeString str = cvt_.icu(b, e);
std::vector<uint8_t> tmp;
tmp.resize(str.length() + 1u);
icu::Collator* collate = get_collator(level);
const int len = collate->getSortKey(str, tmp.data(), tmp.size());
icu::Collator& collate = get_collator(level);
const int len = collate.getSortKey(str, tmp.data(), tmp.size());
if(len > int(tmp.size())) {
tmp.resize(len);
collate->getSortKey(str, tmp.data(), tmp.size());
collate.getSortKey(str, tmp.data(), tmp.size());
} else
tmp.resize(len);
return tmp;
Expand All @@ -126,7 +128,7 @@ namespace boost { namespace locale { namespace impl_icu {

collate_impl(const cdata& d) : cvt_(d.encoding()), locale_(d.locale()), is_utf8_(d.is_utf8()) {}

icu::Collator* get_collator(collate_level level) const
icu::Collator& get_collator(collate_level level) const
{
const int lvl_idx = level_to_int(level);
constexpr icu::Collator::ECollationStrength levels[level_count] = {icu::Collator::PRIMARY,
Expand All @@ -136,18 +138,17 @@ namespace boost { namespace locale { namespace impl_icu {
icu::Collator::IDENTICAL};

icu::Collator* col = collates_[lvl_idx].get();
if(col)
return col;

UErrorCode status = U_ZERO_ERROR;

collates_[lvl_idx].reset(icu::Collator::createInstance(locale_, status));

if(U_FAILURE(status))
throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));

collates_[lvl_idx]->setStrength(levels[lvl_idx]);
return collates_[lvl_idx].get();
if(!col) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Collator> tmp_col(icu::Collator::createInstance(locale_, status));
if(U_FAILURE(status))
throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));

tmp_col->setStrength(levels[lvl_idx]);
col = tmp_col.release();
collates_[lvl_idx].reset(col);
}
return *col;
}

private:
Expand All @@ -173,21 +174,21 @@ namespace boost { namespace locale { namespace impl_icu {
return do_ustring_compare(level, b1, e1, b2, e2, status);
}
#endif

std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type)
{
switch(type) {
case char_facet_t::nochar: break;
case char_facet_t::char_f: return std::locale(in, new collate_impl<char>(cd));
case char_facet_t::wchar_f: return std::locale(in, new collate_impl<wchar_t>(cd));
case char_facet_t::char_f: return impl::create_collators<char, collate_impl>(in, cd);
case char_facet_t::wchar_f: return impl::create_collators<wchar_t, collate_impl>(in, cd);
#ifdef __cpp_char8_t
case char_facet_t::char8_f: break; // std-facet not available (yet)
case char_facet_t::char8_f:
return std::locale(in, new collate_impl<char8_t>(cd)); // std-facet not available (yet)
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
case char_facet_t::char16_f: return std::locale(in, new collate_impl<char16_t>(cd));
case char_facet_t::char16_f: return impl::create_collators<char16_t, collate_impl>(in, cd);
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
case char_facet_t::char32_f: return std::locale(in, new collate_impl<char32_t>(cd));
case char_facet_t::char32_f: return impl::create_collators<char32_t, collate_impl>(in, cd);
#endif
}
return in;
Expand Down
3 changes: 3 additions & 0 deletions src/boost/locale/shared/ids.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand All @@ -24,6 +25,7 @@ namespace boost { namespace locale {
BOOST_LOCALE_DEFINE_ID(calendar_facet);

#define BOOST_LOCALE_INSTANTIATE(CHARTYPE) \
BOOST_LOCALE_DEFINE_ID(collator<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(converter<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(message_format<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(boundary::boundary_indexing<CHARTYPE>);
Expand All @@ -48,6 +50,7 @@ namespace boost { namespace locale {
void init_by(const std::locale& l)
{
init_facet<boundary::boundary_indexing<Char>>(l);
init_facet<collator<Char>>(l);
init_facet<converter<Char>>(l);
init_facet<message_format<Char>>(l);
}
Expand Down
2 changes: 1 addition & 1 deletion src/boost/locale/shared/mo_lambda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace boost { namespace locale { namespace gnu_gettext { namespace lambda {

namespace { // anon
template<class TExp, typename... Ts>
expr_ptr make_expr(Ts... ts)
expr_ptr make_expr(Ts&&... ts)
{
return expr_ptr(new TExp(std::forward<Ts>(ts)...));
}
Expand Down
58 changes: 58 additions & 0 deletions src/boost/locale/shared/std_collate_adapter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#ifndef BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP
#define BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP

#include <boost/locale/collator.hpp>
#include <locale>
#include <utility>

namespace boost { namespace locale { namespace impl {

template<typename CharT, class Base>
class BOOST_SYMBOL_VISIBLE std_collate_adapter : public std::collate<CharT> {
public:
using typename std::collate<CharT>::string_type;

template<typename... TArgs>
explicit std_collate_adapter(TArgs&&... args) : base_(std::forward<TArgs>(args)...)
{}

protected:
int do_compare(const CharT* beg1, const CharT* end1, const CharT* beg2, const CharT* end2) const override
{
return base_.compare(collate_level::identical, beg1, end1, beg2, end2);
}

string_type do_transform(const CharT* beg, const CharT* end) const override
{
return base_.transform(collate_level::identical, beg, end);
}
long do_hash(const CharT* beg, const CharT* end) const override
{
return base_.hash(collate_level::identical, beg, end);
}
Base base_;
};

template<typename CharType, class CollatorImpl, typename... TArgs>
static std::locale create_collators(const std::locale& in, TArgs&&... args)
{
static_assert(std::is_base_of<collator<CharType>, CollatorImpl>::value, "Must be a collator implementation");
std::locale res(in, new CollatorImpl(args...));
return std::locale(res, new std_collate_adapter<CharType, CollatorImpl>(args...));
}

template<typename CharType, template<typename> class CollatorImpl, typename... TArgs>
static std::locale create_collators(const std::locale& in, TArgs&&... args)
{
return create_collators<CharType, CollatorImpl<CharType>>(in, args...);
}

}}} // namespace boost::locale::impl

#endif
2 changes: 1 addition & 1 deletion src/boost/locale/win32/all_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace boost { namespace locale { namespace impl_win {

class winlocale;
struct winlocale;

std::locale create_convert(const std::locale& in, const winlocale& lc, char_facet_t type);

Expand Down

0 comments on commit d709f92

Please sign in to comment.