Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge algorithm #228

Merged
merged 1 commit into from
Aug 12, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions include/boost/compute/algorithm/detail/merge_path.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//

#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP
#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP

#include <iterator>

#include <boost/compute/algorithm/find_if.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
#include <boost/compute/detail/meta_kernel.hpp>
#include <boost/compute/lambda.hpp>
#include <boost/compute/system.hpp>

namespace boost {
namespace compute {
namespace detail {

///
/// \brief Merge Path kernel class
///
/// Subclass of meta_kernel to break two sets into tiles according
/// to their merge path
///
template<class InputIterator1, class InputIterator2,
class OutputIterator1, class OutputIterator2>
class merge_path_kernel : public meta_kernel
{
public:
unsigned int tile_size;

merge_path_kernel() : meta_kernel("merge_path")
{
tile_size = 4;
}

void set_range(InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
InputIterator2 last2,
OutputIterator1 result_a,
OutputIterator2 result_b)
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;

m_a_count = iterator_range_size(first1, last1);
m_a_count_arg = add_arg<uint_>("a_count");

m_b_count = iterator_range_size(first2, last2);
m_b_count_arg = add_arg<uint_>("b_count");

*this <<
"uint i = get_global_id(0);\n" <<
"uint target = (i+1)*" << tile_size << ";\n" <<
"uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" <<
"uint end = min(target,a_count);\n" <<
"uint a_index, b_index;\n" <<
"while(start<end)\n" <<
"{\n" <<
" a_index = (start + end)/2;\n" <<
" b_index = target - a_index - 1;\n" <<
" if(" << first1[expr<uint_>("a_index")] <<
" <=" << first2[expr<uint_>("b_index")] << ")\n" <<
" start = a_index + 1;\n" <<
" else end = a_index;\n" <<
"}\n" <<
result_a[expr<uint_>("i")] << " = start;\n" <<
result_b[expr<uint_>("i")] << " = target - start;\n";

}

event exec(command_queue &queue)
{
if((m_a_count + m_b_count)/tile_size == 0) {
return event();
}

set_arg(m_a_count_arg, uint_(m_a_count));
set_arg(m_b_count_arg, uint_(m_b_count));

return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size);
}

private:
size_t m_a_count;
size_t m_a_count_arg;
size_t m_b_count;
size_t m_b_count_arg;
};

} //end detail namespace
} //end compute namespace
} //end boost namespace

#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP
176 changes: 176 additions & 0 deletions include/boost/compute/algorithm/detail/merge_with_merge_path.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//

#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP

#include <iterator>

#include <boost/compute/algorithm/detail/merge_path.hpp>
#include <boost/compute/algorithm/fill_n.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
#include <boost/compute/detail/meta_kernel.hpp>
#include <boost/compute/system.hpp>

namespace boost {
namespace compute {
namespace detail {

///
/// \brief Serial merge kernel class
///
/// Subclass of meta_kernel to perform serial merge after tiling
///
template<class InputIterator1, class InputIterator2,
class InputIterator3, class InputIterator4,
class OutputIterator>
class serial_merge_kernel : meta_kernel
{
public:
unsigned int tile_size;

serial_merge_kernel() : meta_kernel("merge")
{
tile_size = 4;
}

void set_range(InputIterator1 first1,
InputIterator2 first2,
InputIterator3 tile_first1,
InputIterator3 tile_last1,
InputIterator4 tile_first2,
OutputIterator result)
{
m_count = iterator_range_size(tile_first1, tile_last1) - 1;

*this <<
"uint i = get_global_id(0);\n" <<
"uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
"uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
"uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
"uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
"uint index = i*" << tile_size << ";\n" <<
"while(start1<end1 && start2<end2)\n" <<
"{\n" <<
" if(" << first1[expr<uint_>("start1")] << " <= " <<
first2[expr<uint_>("start2")] << ")\n" <<
" {\n" <<
result[expr<uint_>("index")] <<
" = " << first1[expr<uint_>("start1")] << ";\n" <<
" index++;\n" <<
" start1++;\n" <<
" }\n" <<
" else\n" <<
" {\n" <<
result[expr<uint_>("index")] <<
" = " << first2[expr<uint_>("start2")] << ";\n" <<
" index++;\n" <<
" start2++;\n" <<
" }\n" <<
"}\n" <<
"while(start1<end1)\n" <<
"{\n" <<
result[expr<uint_>("index")] <<
" = " << first1[expr<uint_>("start1")] << ";\n" <<
" index++;\n" <<
" start1++;\n" <<
"}\n" <<
"while(start2<end2)\n" <<
"{\n" <<
result[expr<uint_>("index")] <<
" = " << first2[expr<uint_>("start2")] << ";\n" <<
" index++;\n" <<
" start2++;\n" <<
"}\n";
}

event exec(command_queue &queue)
{
if(m_count == 0) {
return event();
}

return exec_1d(queue, 0, m_count);
}

private:
size_t m_count;
};

///
/// \brief Merge algorithm with merge path
///
/// Merges the sorted values in the range [\p first1, \p last1) with
/// the sorted values in the range [\p first2, last2) and stores the
/// result in the range beginning at \p result
///
/// \param first1 Iterator pointing to start of first set
/// \param last1 Iterator pointing to end of first set
/// \param first2 Iterator pointing to start of second set
/// \param last2 Iterator pointing to end of second set
/// \param result Iterator pointing to start of range in which the result
/// will be stored
/// \param queue Queue on which to execute
///
template<class InputIterator1, class InputIterator2, class OutputIterator>
inline OutputIterator
merge_with_merge_path(InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
InputIterator2 last2,
OutputIterator result,
command_queue &queue = system::default_queue())
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;

int tile_size = 1024;

int count1 = iterator_range_size(first1, last1);
int count2 = iterator_range_size(first2, last2);

vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());

// Tile the sets
merge_path_kernel<InputIterator1,
InputIterator2,
vector<uint_>::iterator,
vector<uint_>::iterator> tiling_kernel;
tiling_kernel.tile_size = 1024;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
fill_n(tile_b.begin(), 1, 0, queue);
tiling_kernel.exec(queue);

fill_n(tile_a.end()-1, 1, count1, queue);
fill_n(tile_b.end()-1, 1, count2, queue);

// Merge
serial_merge_kernel<InputIterator1,
InputIterator2,
vector<uint_>::iterator,
vector<uint_>::iterator,
OutputIterator> merge_kernel;
merge_kernel.tile_size = 1024;
merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
tile_b.begin(), result);

merge_kernel.exec(queue);

return result + count1 + count2;
}

} //end detail namespace
} //end compute namespace
} //end boost namespace

#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
5 changes: 2 additions & 3 deletions include/boost/compute/algorithm/merge.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <boost/compute/system.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/algorithm/detail/merge_with_merge_path.hpp>
#include <boost/compute/algorithm/detail/serial_merge.hpp>

namespace boost {
Expand All @@ -30,9 +31,7 @@ inline OutputIterator merge(InputIterator1 first1,
OutputIterator result,
command_queue &queue = system::default_queue())
{
typedef typename std::iterator_traits<InputIterator1>::value_type T1;

return merge(first1, last1, first2, last2, result, less<T1>(), queue);
return detail::merge_with_merge_path(first1, last1, first2, last2, result, queue);
}

/// Merges the sorted values in the range [\p first1, \p last1) with
Expand Down