Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement weighted random number generation using maps #44

Merged
merged 12 commits into from
Jul 5, 2023
201 changes: 189 additions & 12 deletions include/effolkronium/random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ namespace effolkronium {

namespace details {
/// Key type for getting common type numbers or objects
struct common{ };
struct common{ };

/// Key type for weighted random number generation
struct weight{ };

/// True if type T is applicable by a std::uniform_int_distribution
template<typename T>
Expand Down Expand Up @@ -145,6 +148,15 @@ namespace effolkronium {
decltype(test<T>(0)), long>::value;
};

template<typename...>
using void_t = void;

template<typename Type, typename = void>
struct is_map : public std::false_type {};

template<typename Type>
struct is_map<Type, void_t<typename Type::key_type, typename Type::mapped_type, typename Type::value_type>> : public std::true_type{};
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


} // namespace details

/// Default seeder for 'random' classes
Expand Down Expand Up @@ -202,6 +214,9 @@ namespace effolkronium {
/// Key type for getting common type numbers or objects
using common = details::common;

/// Key type for weighted random number generation
using weight = details::weight;

/**
* \return The minimum value
* potentially generated by the random-number engine
Expand Down Expand Up @@ -419,11 +434,11 @@ namespace effolkronium {
}

/**
* \brief Return random value from initilizer_list
* \param init_list initilizer_list with values
* \return Random value from initilizer_list
* \note Should be 1 or more elements in initilizer_list
* \note Warning! Elements in initilizer_list can't be moved:
* \brief Return random value from initializer_list
* \param init_list initializer_list with values
* \return Random value from initializer_list
* \note Should be 1 or more elements in initializer_list
* \note Warning! Elements in initializer_list can't be moved:
* https://stackoverflow.com/a/8193157/5734836
*/
template<typename T>
Expand Down Expand Up @@ -641,6 +656,87 @@ namespace effolkronium {
return dist( engine_instance( ) );
}

/**
* \brief Return a random iterator from given map container by
* utilizing the values of the map container as weights
* for weighted random number generation
* \param Key The Key type for this version of 'get' method
* Type should be '(THIS_TYPE)::common' struct
* \param map_container A container that has mapped_type,
* value_type and key_type defined
* \note return the end iterator if the iterator is empty or total weight is equals to sum
*/
template<
typename Key,
class MapContainer
>
static auto get(const MapContainer& map_container) -> typename std::enable_if<
details::is_map<MapContainer>::value &&
details::is_iterator<decltype(std::begin(map_container))>::value &&
!std::is_signed<typename MapContainer::mapped_type>::value &&
std::is_same<Key, details::weight>::value,
decltype(std::begin(map_container))>::type {
using MappedType = typename MapContainer::mapped_type;
using IteratorType = decltype(std::begin(map_container));

MappedType total_weight = 0;
for (IteratorType it = std::begin(map_container); it != std::end(map_container); ++it) {
total_weight += it->second;
}
if(total_weight == MappedType(0)) return std::end(map_container);

MappedType random_weight = get(MappedType(0), total_weight - 1);
MappedType sum = 0;

for(IteratorType it = std::begin(map_container); it != std::end(map_container); ++it)
{
sum += it->second;
if(sum > random_weight) return it;
}
return std::end(map_container);
}

/**
* \brief Return a random iterator from given map container by
* utilizing the values of the map container as weights
* for weighted random number generation
* \param Key The Key type for this version of 'get' method
* Type should be '(THIS_TYPE)::common' struct
* \param map_container A container that has mapped_type,
* value_type and key_type defined
* \note return the end iterator if the iterator is empty
*/
template<
typename Key,
class MapContainer
>
static auto get(const MapContainer& map_container) -> typename std::enable_if<
details::is_map<MapContainer>::value &&
details::is_iterator<decltype(std::begin(map_container))>::value &&
details::is_uniform_real<typename MapContainer::mapped_type>::value &&
std::is_same<Key, details::weight>::value,
decltype(std::begin(map_container))>::type {
using MappedType = typename MapContainer::mapped_type;
using IteratorType = decltype(std::begin(map_container));

MappedType total_weight = 0;
for (IteratorType it = std::begin(map_container); it != std::end(map_container); ++it) {
assert(it->second >= MappedType(0));
total_weight += it->second;
}
if(total_weight == MappedType(0)) return std::end(map_container);

MappedType random_weight = get(MappedType(0), std::nextafter(total_weight, (std::numeric_limits<MappedType>::min)( )));
MappedType sum = 0;

for(IteratorType it = std::begin(map_container); it != std::end(map_container); ++it)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By iterating through map we're missing reason of using the map instead of vector<pair<Key, Value>>

And for unordered map the order of weights will be different and result of this function will be different

I'm not expert in this algorithm, can it be done with map::upper_bound or map::lower_bound function ?
https://en.cppreference.com/w/cpp/container/map/upper_bound
for logarithmic complexity ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The upper_bound and lower_bound functions operate on the keys of the map so, it is not suitable for this case. And even if it operated on values, we had to store total weight until that object instead of the weight of that object as the value.

Also, the "ordering" of std::unordered_map should not affect the result. Because the probability of each item getting picked is directly tied to its weight.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine, can you code a simple test case for this inside test/random_test.cpp file ?

I can do it by myself but later next week

Copy link
Contributor Author

@DolphyWind DolphyWind Jul 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During testing, I found out the algorithm fails for floats and doubles as values. I am also fixing that. Also, do you have specific test case(s) that you want me to code.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have nothing special, you can try your best

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All done. But I have one question, should I check whether if there is a negative weight using assert?

It's much better to not allow users to use negative values at all by disallowing it with extra condition for enable_if

Something like this:
!std::is_signed<MapContainer::value_type >::value
And compilation should fail with signed map value types, so users required to use only unsigned types

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For real numbers (float, double, long double) assert is ok

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this completely disallows users to use reals as weights.

Copy link
Owner

@ilqvya ilqvya Jul 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::is_unsigned<MapContainer::value_type>::value || is_uniform_real<MapContainer::value_type>::value
...
assert(std::is_unsigned<MapContainer::value_type>::value || map_has_all_positive_real_numbers())

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

{
sum += it->second;
if(sum > random_weight) return it;
}
return std::end(map_container);
}

/**
* \brief Reorders the elements in the given range [first, last)
* such that each possible permutation of those elements
Expand Down Expand Up @@ -1018,11 +1114,11 @@ namespace effolkronium {
}

/**
* \brief Return random value from initilizer_list
* \param init_list initilizer_list with values
* \return Random value from initilizer_list
* \note Should be 1 or more elements in initilizer_list
* \note Warning! Elements in initilizer_list can't be moved:
* \brief Return random value from initializer_list
* \param init_list initializer_list with values
* \return Random value from initializer_list
* \note Should be 1 or more elements in initializer_list
* \note Warning! Elements in initializer_list can't be moved:
* https://stackoverflow.com/a/8193157/5734836
*/
template<typename T>
Expand Down Expand Up @@ -1240,6 +1336,87 @@ namespace effolkronium {
return dist( m_engine );
}

/**
* \brief Return a random iterator from given map container by
* utilizing the values of the map container as weights
* for weighted random number generation
* \param Key The Key type for this version of 'get' method
* Type should be '(THIS_TYPE)::common' struct
* \param map_container A container that has mapped_type,
* value_type and key_type defined
* \note return the end iterator if the iterator is empty or total weight is equals to sum
*/
template<
typename Key,
class MapContainer
>
auto get(const MapContainer& map_container) -> typename std::enable_if<
details::is_map<MapContainer>::value &&
details::is_iterator<decltype(std::begin(map_container))>::value &&
!std::is_signed<typename MapContainer::mapped_type>::value &&
std::is_same<Key, details::weight>::value,
decltype(std::begin(map_container))>::type {
using MappedType = typename MapContainer::mapped_type;
using IteratorType = decltype(std::begin(map_container));

MappedType total_weight = 0;
for (IteratorType it = std::begin(map_container); it != std::end(map_container); ++it) {
total_weight += it->second;
}
if(total_weight == MappedType(0)) return std::end(map_container);

MappedType random_weight = get(MappedType(0), total_weight - 1);
MappedType sum = 0;

for(IteratorType it = std::begin(map_container); it != std::end(map_container); ++it)
{
sum += it->second;
if(sum > random_weight) return it;
}
return std::end(map_container);
}

/**
* \brief Return a random iterator from given map container by
* utilizing the values of the map container as weights
* for weighted random number generation
* \param Key The Key type for this version of 'get' method
* Type should be '(THIS_TYPE)::common' struct
* \param map_container A container that has mapped_type,
* value_type and key_type defined
* \note return the end iterator if the iterator is empty
*/
template<
typename Key,
class MapContainer
>
auto get(const MapContainer& map_container) -> typename std::enable_if<
details::is_map<MapContainer>::value &&
details::is_iterator<decltype(std::begin(map_container))>::value &&
details::is_uniform_real<typename MapContainer::mapped_type>::value &&
std::is_same<Key, details::weight>::value,
decltype(std::begin(map_container))>::type {
using MappedType = typename MapContainer::mapped_type;
using IteratorType = decltype(std::begin(map_container));

MappedType total_weight = 0;
for (IteratorType it = std::begin(map_container); it != std::end(map_container); ++it) {
assert(it->second >= MappedType(0));
total_weight += it->second;
}
if(total_weight == MappedType(0)) return std::end(map_container);

MappedType random_weight = get(MappedType(0), std::nextafter(total_weight, (std::numeric_limits<MappedType>::min)( )));
MappedType sum = 0;

for(IteratorType it = std::begin(map_container); it != std::end(map_container); ++it)
{
sum += it->second;
if(sum > random_weight) return it;
}
return std::end(map_container);
}

/**
* \brief Reorders the elements in the given range [first, last)
* such that each possible permutation of those elements
Expand Down Expand Up @@ -1285,7 +1462,7 @@ namespace effolkronium {
/**
* \brief The basic static random alias based on a std::mt19937
* \note It uses static methods API and data with static storage
* \note Not thread safe but more prefomance
* \note Not thread safe but more performance
*/
using random_static = basic_random_static<std::mt19937>;

Expand Down
85 changes: 85 additions & 0 deletions test/random_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <array>
#include <thread>
#include <vector>
#include <map>
#include <unordered_map>

#ifdef _WIN32 // Unit test for case when builds fail 'cause of min\max macro included from Windows.h before random.hpp
#include "Windows.h"
Expand Down Expand Up @@ -992,3 +994,86 @@ TEST_CASE("Construct Seeder only once") {
REQUIRE(constructCount == 2);
REQUIRE(invokeCount == 2);
}

TEST_CASE("is_map trait"){
static_assert(effolkronium::details::is_map<std::unordered_map<int ,int>>::value, "");
static_assert(effolkronium::details::is_map<std::map<int ,int>>::value, "");
// This should also count as a map
struct CustomMap{
using key_type = int;
using mapped_type = int;
using value_type = int;
};
static_assert(effolkronium::details::is_map<CustomMap>::value, "");

// Some datatypes that shouldn't be classified as a map
static_assert(!effolkronium::details::is_map<int>::value, "");
static_assert(!effolkronium::details::is_map<std::vector<int>>::value, "");
static_assert(!effolkronium::details::is_map<std::string>::value, "");
}

TEST_CASE("Empty map and maps with zero total weight tests"){
// Empty maps
std::unordered_map<std::string, unsigned long> empty_ulong_umap;
std::unordered_map<std::string, unsigned> empty_uint_umap;
std::unordered_map<std::string, float> empty_float_umap;
std::unordered_map<std::string, double> empty_double_umap;

std::map<std::string, unsigned long> empty_ulong_map;
std::map<std::string, unsigned> empty_uint_map;
std::map<std::string, float> empty_float_map;
std::map<std::string, double> empty_double_map;

REQUIRE(Random DOT get<Random_t::weight>(empty_ulong_umap) == empty_ulong_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_uint_umap) == empty_uint_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_float_umap) == empty_float_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_double_umap) == empty_double_umap.end());

REQUIRE(Random DOT get<Random_t::weight>(empty_ulong_map) == empty_ulong_map.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_uint_map) == empty_uint_map.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_float_map) == empty_float_map.end());
REQUIRE(Random DOT get<Random_t::weight>(empty_double_map) == empty_double_map.end());

// Zero weighted maps
std::unordered_map<std::string, unsigned long> zero_long_umap = {{"Orange", 0ul}, {"Apple", 0ul}, {"Banana", 0ul}};
std::unordered_map<std::string, unsigned> zero_uint_umap = {{"Orange", 0u}, {"Apple", 0u}, {"Banana", 0u}};
std::unordered_map<std::string, float> zero_float_umap = {{"Orange", 0.0f}, {"Apple", 0.0f}, {"Banana", 0.0f}};
std::unordered_map<std::string, double> zero_double_umap = {{"Orange", 0.0}, {"Apple", 0.0}, {"Banana", 0.0}};

std::map<std::string, unsigned long> zero_ulong_map = {{"Orange", 0ul}, {"Apple", 0ul}, {"Banana", 0ul}};
std::map<std::string, unsigned> zero_uint_map = {{"Orange", 0u}, {"Apple", 0u}, {"Banana", 0u}};
std::map<std::string, float> zero_float_map = {{"Orange", 0.0f}, {"Apple", 0.0f}, {"Banana", 0.0f}};
std::map<std::string, double> zero_double_map = {{"Orange", 0.0}, {"Apple", 0.0}, {"Banana", 0.0}};

REQUIRE(Random DOT get<Random_t::weight>(zero_long_umap) == zero_long_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_uint_umap) == zero_uint_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_float_umap) == zero_float_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_double_umap) == zero_double_umap.end());

REQUIRE(Random DOT get<Random_t::weight>(zero_ulong_map) == zero_ulong_map.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_uint_map) == zero_uint_map.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_float_map) == zero_float_map.end());
REQUIRE(Random DOT get<Random_t::weight>(zero_double_map) == zero_double_map.end());
}

TEST_CASE("Maps with non-zero total weight tests"){
std::unordered_map<std::string, unsigned long> nonzero_ulong_umap = {{"Orange", 1ul}, {"Apple", 2ul}, {"Banana", 3ul}};
std::unordered_map<std::string, unsigned> nonzero_uint_umap = {{"Orange", 1u}, {"Apple", 2u}, {"Banana", 3u}};
std::unordered_map<std::string, float> nonzero_float_umap = {{"Orange", 1.0f}, {"Apple", 2.0f}, {"Banana", 3.0f}};
std::unordered_map<std::string, double> nonzero_double_umap = {{"Orange", 1.0}, {"Apple", 2.0}, {"Banana", 3.0}};

std::map<std::string, unsigned long> nonzero_ulong_map = {{"Orange", 1ul}, {"Apple", 2ul}, {"Banana", 3ul}};
std::map<std::string, unsigned> nonzero_uint_map = {{"Orange", 1u}, {"Apple", 2u}, {"Banana", 3u}};
std::map<std::string, float> nonzero_float_map = {{"Orange", 1.0f}, {"Apple", 2.0f}, {"Banana", 3.0f}};
std::map<std::string, double> nonzero_double_map = {{"Orange", 1.0}, {"Apple", 2.0}, {"Banana", 3.0}};

REQUIRE(Random DOT get<Random_t::weight>(nonzero_ulong_umap) != nonzero_ulong_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_uint_umap) != nonzero_uint_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_float_umap) != nonzero_float_umap.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_double_umap) != nonzero_double_umap.end());

REQUIRE(Random DOT get<Random_t::weight>(nonzero_ulong_map) != nonzero_ulong_map.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_uint_map) != nonzero_uint_map.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_float_map) != nonzero_float_map.end());
REQUIRE(Random DOT get<Random_t::weight>(nonzero_double_map) != nonzero_double_map.end());
}