130 changes: 130 additions & 0 deletions include/boost/sort/spreadsort/integer_sort.hpp
Expand Up @@ -24,6 +24,8 @@ Doxygen comments by Paul A. Bristow Jan 2015
#include <boost/static_assert.hpp>
#include <boost/sort/spreadsort/detail/constants.hpp>
#include <boost/sort/spreadsort/detail/integer_sort.hpp>
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>

namespace boost {
namespace sort {
Expand Down Expand Up @@ -80,6 +82,46 @@ Some performance plots of runtime vs. n and log(range) are provided:\n
detail::integer_sort(first, last, *first >> 0);
}

/*! \brief Integer sort algorithm using range.
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
so @c integer_sort is asymptotically faster
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
so its worst-case with default settings for 32-bit integers is
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
Some performance plots of runtime vs. n and log(range) are provided:\n
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
\n
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
\param[in] range Range [first, last) for sorting.
\pre [@c first, @c last) is a valid range.
\post The elements in the range [@c first, @c last) are sorted in ascending order.
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
\warning Invalid arguments cause undefined behaviour.
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
enabling faster generic-programming.
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
\remark * N is @c last - @c first,
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
*/
template <class Range>
inline void integer_sort(Range& range)
{
integer_sort(boost::begin(range), boost::end(range));
}

/*! \brief Integer sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
Expand Down Expand Up @@ -129,6 +171,50 @@ Some performance plots of runtime vs. n and log(range) are provided:\n
detail::integer_sort(first, last, shift(*first, 0), shift, comp);
}

/*! \brief Integer sort algorithm using range with both right-shift and user-defined comparison operator.
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
so @c integer_sort is asymptotically faster
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
so its worst-case with default settings for 32-bit integers is
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
Some performance plots of runtime vs. n and log(range) are provided:\n
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
\n
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
\param[in] range Range [first, last) for sorting.
\param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
\pre [@c first, @c last) is a valid range.
\post The elements in the range [@c first, @c last) are sorted in ascending order.
\return @c void.
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
the right shift, subtraction of right-shifted elements, functors,
or any operations on iterators throw.
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
\warning Invalid arguments cause undefined behaviour.
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
enabling faster generic-programming.
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
\remark * N is @c last - @c first,
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
*/
template <class Range, class Right_shift, class Compare>
inline void integer_sort(Range& range, Right_shift shift, Compare comp)
{
integer_sort(boost::begin(range), boost::end(range), shift, comp);
}

/*! \brief Integer sort algorithm using random access iterators with just right-shift functor.
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
Expand Down Expand Up @@ -177,6 +263,50 @@ Some performance plots of runtime vs. n and log(range) are provided:\n
else
detail::integer_sort(first, last, shift(*first, 0), shift);
}


/*! \brief Integer sort algorithm using range with just right-shift functor.
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
\par Performance:
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
so @c integer_sort is asymptotically faster
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
so its worst-case with default settings for 32-bit integers is
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
Some performance plots of runtime vs. n and log(range) are provided:\n
* <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
* <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
\param[in] range Range [first, last) for sorting.
\param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
\pre [@c first, @c last) is a valid range.
\post The elements in the range [@c first, @c last) are sorted in ascending order.
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
the right shift, subtraction of right-shifted elements, functors,
or any operations on iterators throw.
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
\warning Invalid arguments cause undefined behaviour.
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
enabling faster generic-programming.
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
\remark * N is @c last - @c first,
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
*/
template <class Range, class Right_shift>
inline void integer_sort(Range& range, Right_shift shift)
{
integer_sort(boost::begin(range), boost::end(range), shift);
}
}
}
}
Expand Down
23 changes: 23 additions & 0 deletions include/boost/sort/spreadsort/spreadsort.hpp
Expand Up @@ -12,6 +12,8 @@ Some improvements suggested by:
Phil Endecott and Frank Gennari
float_mem_cast fix provided by:
Scott McMurray
Range support provided by:
Alexander Zaitsev
*/

#ifndef BOOST_SORT_SPREADSORT_HPP
Expand All @@ -25,6 +27,8 @@ Scott McMurray
#include <boost/sort/spreadsort/integer_sort.hpp>
#include <boost/sort/spreadsort/float_sort.hpp>
#include <boost/sort/spreadsort/string_sort.hpp>
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>

namespace boost {
namespace sort {
Expand Down Expand Up @@ -139,6 +143,25 @@ namespace spreadsort {
boost::uint16_t unused = 0;
string_sort(first, last, unused);
}

/*!
\brief Generic @c spreadsort variant detects value_type and calls required sort function.
\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
\param[in] range Range [first, last) for sorting.
\pre [@c first, @c last) is a valid range.
\post The elements in the range [@c first, @c last) are sorted in ascending order.
*/

template <class Range>
void spreadsort(Range& range)
{
spreadsort(boost::begin(range), boost::end(range));
}


} // namespace spreadsort
} // namespace sort
} // namespace boost
Expand Down
388 changes: 340 additions & 48 deletions include/boost/sort/spreadsort/string_sort.hpp

Large diffs are not rendered by default.

848 changes: 848 additions & 0 deletions include/boost/sort/timsort.hpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion test/Jamfile.v2
Expand Up @@ -19,7 +19,8 @@ import testing ;
: : : : string_sort ]
[ run sort_detail_test.cpp
: : : : sort_detail ]

[ run timsort_test.cpp
: : : : timsort ]

;
}
126 changes: 126 additions & 0 deletions test/timsort_test.cpp
@@ -0,0 +1,126 @@
/*
Copyright (c) Alexander Zaitsev <zamazan4ik@gmail.com>, 2016
Distributed under the Boost Software License, Version 1.0. (See
accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
See http://www.boost.org/ for latest version.
*/

#include <boost/sort/timsort.hpp>
// Include unit test framework
#include <boost/test/included/test_exec_monitor.hpp>
#include <boost/test/test_tools.hpp>
#include <vector>
#include <cstdlib>
#include <algorithm>

using namespace std;
using namespace boost::sort;

struct A
{
int a;
int b;

A(int val_a, int val_b) : a(val_a), b(val_b) {}

bool operator<(const A& val) const
{
return a < val.a;
}

bool operator==(const A& val) const
{
return a == val.a && b == val.b;
}
};


boost::int32_t
rand_32(bool sign = true) {
boost::int32_t result = rand() | (rand()<< 16);
if (rand() % 2)
result |= 1 << 15;
//Adding the sign bit
if (sign && (rand() % 2))
result *= -1;
return result;
}

void stable_test()
{
vector<A> base_vec;
for(size_t i = 0; i < 1000; ++i)
{
base_vec.push_back(A(rand() % 6, rand() % 11));
}

vector<A> test_vec = base_vec;
vector<A> sorted_vec = base_vec;
stable_sort(sorted_vec.begin(), sorted_vec.end());
timsort(test_vec);
BOOST_CHECK(test_vec == sorted_vec);
}

void random_test()
{
// Prepare inputs
vector<int> base_vec;
size_t count = 100000;
//Generating semirandom numbers
for (size_t u = 0; u < count; ++u)
{
base_vec.push_back(rand_32());
}
vector<int> sorted_vec = base_vec;
vector<int> test_vec = base_vec;
std::sort(sorted_vec.begin(), sorted_vec.end());
//Testing basic call
timsort(test_vec.begin(), test_vec.end());
BOOST_CHECK(test_vec == sorted_vec);
//Boost.Range variant
test_vec = base_vec;
timsort(test_vec);
BOOST_CHECK(test_vec == sorted_vec);
//Functor
test_vec = base_vec;
timsort(test_vec.begin(), test_vec.end(), less<int>());
BOOST_CHECK(test_vec == sorted_vec);
//reverse order
test_vec = base_vec;
sorted_vec = base_vec;
std::sort(test_vec.begin(), test_vec.end(), greater<int>());
std::sort(sorted_vec.begin(), sorted_vec.end());
timsort(test_vec.begin(), test_vec.end());
BOOST_CHECK(test_vec == sorted_vec);
}


template<typename T>
void corner_test(T val)
{
vector<T> test_vec;
boost::sort::timsort(test_vec.begin(), test_vec.end());
BOOST_CHECK(test_vec.size() == 0);
test_vec.push_back(val);
boost::sort::timsort(test_vec.begin(), test_vec.end());
BOOST_CHECK(test_vec.size() == 1);
BOOST_CHECK(test_vec[0] == val);
}


// test main
int test_main( int, char*[] )
{
srand(1);

random_test();
stable_test();

//Corner tests
corner_test<int>(42);
corner_test<double>(42.0);
corner_test<size_t>(0);
corner_test<A>(A(42, 322));
return 0;
}