Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions include/boost/compute/algorithm/adjacent_difference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,7 @@ dispatch_adjacent_difference(InputIterator first,
BinaryFunction op,
command_queue &queue = system::default_queue())
{
if(first == last){
return result;
}

size_t count = detail::iterator_range_size(first, last);

detail::meta_kernel k("adjacent_difference");

k << "const uint i = get_global_id(0);\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ inline size_t bitonic_block_sort(KeyIterator keys_first,
size_t count_arg = k.add_arg<const uint_>("count");

size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "lkeys");
size_t local_vals_arg;
size_t local_vals_arg = 0;
if(sort_by_key) {
local_vals_arg = k.add_arg<uchar_ *>(memory_object::local_memory, "lidx");
}
Expand Down
105 changes: 105 additions & 0 deletions include/boost/compute/algorithm/detail/reduce_on_cpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//

#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP
#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP

#include <boost/compute/buffer.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/detail/meta_kernel.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
#include <boost/compute/detail/parameter_cache.hpp>
#include <boost/compute/iterator/buffer_iterator.hpp>
#include <boost/compute/type_traits/result_of.hpp>
#include <boost/compute/algorithm/detail/serial_reduce.hpp>

namespace boost {
namespace compute {
namespace detail {

template<class InputIterator, class OutputIterator, class BinaryFunction>
inline void reduce_on_cpu(InputIterator first,
InputIterator last,
OutputIterator result,
BinaryFunction function,
command_queue &queue)
{
typedef typename
std::iterator_traits<InputIterator>::value_type T;
typedef typename
::boost::compute::result_of<BinaryFunction(T, T)>::type result_type;

const device &device = queue.get_device();
boost::shared_ptr<parameter_cache> parameters =
detail::parameter_cache::get_global_cache(device);

std::string cache_key =
"__boost_reduce_cpu_" + boost::lexical_cast<std::string>(sizeof(T));

// for inputs smaller than serial_reduce_threshold
// serial_reduce algorithm is used
uint_ serial_reduce_threshold =
parameters->get(cache_key, "serial_reduce_threshold", 16384 * sizeof(T));

const context &context = queue.get_context();
size_t count = detail::iterator_range_size(first, last);
if(count == 0){
return;
}
else if(count < serial_reduce_threshold) {
return serial_reduce(first, last, result, function, queue);
}

meta_kernel k("reduce_on_cpu");
const size_t compute_units = queue.get_device().compute_units();
buffer output(context, sizeof(result_type) * compute_units);

size_t count_arg = k.add_arg<uint_>("count");
size_t output_arg =
k.add_arg<result_type *>(memory_object::global_memory, "output");

k <<
"uint block = " <<
"(uint)ceil(((float)count)/get_global_size(0));\n" <<
"uint index = get_global_id(0) * block;\n" <<
"uint end = min(count, index + block);\n" <<

k.decl<result_type>("result") << " = " << first[k.var<uint_>("index")] << ";\n" <<
"index++;\n" <<
"while(index < end){\n" <<
"result = " << function(k.var<T>("result"),
first[k.var<uint_>("index")]) << ";\n" <<
"index++;\n" <<
"}\n" <<
"output[get_global_id(0)] = result;\n";

size_t global_work_size = compute_units;
kernel kernel = k.compile(context);

// reduction to global_work_size elements
kernel.set_arg(count_arg, static_cast<uint_>(count));
kernel.set_arg(output_arg, output);
queue.enqueue_1d_range_kernel(kernel, 0, global_work_size, 0);

// final reduction
reduce_on_cpu(
make_buffer_iterator<result_type>(output),
make_buffer_iterator<result_type>(output, global_work_size),
result,
function,
queue
);
}

} // end detail namespace
} // end compute namespace
} // end boost namespace

#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP
14 changes: 7 additions & 7 deletions include/boost/compute/algorithm/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <boost/compute/algorithm/copy_n.hpp>
#include <boost/compute/algorithm/detail/inplace_reduce.hpp>
#include <boost/compute/algorithm/detail/reduce_on_gpu.hpp>
#include <boost/compute/algorithm/detail/serial_reduce.hpp>
#include <boost/compute/algorithm/detail/reduce_on_cpu.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
#include <boost/compute/memory/local_buffer.hpp>
#include <boost/compute/type_traits/result_of.hpp>
Expand Down Expand Up @@ -173,8 +173,8 @@ inline void generic_reduce(InputIterator first,
size_t count = detail::iterator_range_size(first, last);

if(device.type() & device::cpu){
boost::compute::vector<result_type> value(1, context);
detail::serial_reduce(first, last, value.begin(), function, queue);
array<result_type, 1> value(context);
detail::reduce_on_cpu(first, last, value.begin(), function, queue);
boost::compute::copy_n(value.begin(), 1, result, queue);
}
else {
Expand Down Expand Up @@ -209,16 +209,16 @@ inline void dispatch_reduce(InputIterator first,
const device &device = queue.get_device();

// reduce to temporary buffer on device
array<T, 1> tmp(context);
array<T, 1> value(context);
if(device.type() & device::cpu){
detail::serial_reduce(first, last, tmp.begin(), function, queue);
detail::reduce_on_cpu(first, last, value.begin(), function, queue);
}
else {
reduce_on_gpu(first, last, tmp.begin(), function, queue);
reduce_on_gpu(first, last, value.begin(), function, queue);
}

// copy to result iterator
copy_n(tmp.begin(), 1, result, queue);
copy_n(value.begin(), 1, result, queue);
}

template<class InputIterator, class OutputIterator, class BinaryFunction>
Expand Down
52 changes: 41 additions & 11 deletions test/test_adjacent_difference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,37 +28,67 @@ namespace compute = boost::compute;

BOOST_AUTO_TEST_CASE(adjacent_difference_int)
{
compute::vector<int> a(5, context);
using compute::int_;

compute::vector<int_> a(5, context);
compute::iota(a.begin(), a.end(), 0, queue);
CHECK_RANGE_EQUAL(int, 5, a, (0, 1, 2, 3, 4));
CHECK_RANGE_EQUAL(int_, 5, a, (0, 1, 2, 3, 4));

compute::vector<int> b(5, context);
compute::vector<int>::iterator iter =
compute::vector<int_> b(5, context);
compute::vector<int_>::iterator iter =
compute::adjacent_difference(a.begin(), a.end(), b.begin(), queue);
BOOST_CHECK(iter == b.end());
CHECK_RANGE_EQUAL(int, 5, b, (0, 1, 1, 1, 1));
CHECK_RANGE_EQUAL(int_, 5, b, (0, 1, 1, 1, 1));

int data[] = { 1, 9, 36, 48, 81 };
int_ data[] = { 1, 9, 36, 48, 81 };
compute::copy(data, data + 5, a.begin(), queue);
CHECK_RANGE_EQUAL(int, 5, a, (1, 9, 36, 48, 81));
CHECK_RANGE_EQUAL(int_, 5, a, (1, 9, 36, 48, 81));

iter = compute::adjacent_difference(a.begin(), a.end(), b.begin(), queue);
BOOST_CHECK(iter == b.end());
CHECK_RANGE_EQUAL(int, 5, b, (1, 8, 27, 12, 33));
CHECK_RANGE_EQUAL(int_, 5, b, (1, 8, 27, 12, 33));
}

BOOST_AUTO_TEST_CASE(adjacent_difference_first_eq_last)
{
using compute::int_;

compute::vector<int_> a(size_t(5), int_(1), queue);
compute::vector<int_> b(size_t(5), int_(0), queue);
compute::vector<int_>::iterator iter =
compute::adjacent_difference(a.begin(), a.begin(), b.begin(), queue);
BOOST_CHECK(iter == b.begin());
CHECK_RANGE_EQUAL(int_, 5, b, (0, 0, 0, 0, 0));
}

BOOST_AUTO_TEST_CASE(adjacent_difference_first_eq_result)
{
using compute::int_;

compute::vector<int_> a(5, context);
compute::iota(a.begin(), a.end(), 0, queue);
CHECK_RANGE_EQUAL(int_, 5, a, (0, 1, 2, 3, 4));

compute::vector<int_>::iterator iter =
compute::adjacent_difference(a.begin(), a.end(), a.begin(), queue);
BOOST_CHECK(iter == a.end());
CHECK_RANGE_EQUAL(int_, 5, a, (0, 1, 1, 1, 1));
}

BOOST_AUTO_TEST_CASE(all_same)
{
compute::vector<int> input(1000, context);
using compute::int_;

compute::vector<int_> input(1000, context);
compute::fill(input.begin(), input.end(), 42, queue);

compute::vector<int> output(input.size(), context);
compute::vector<int_> output(input.size(), context);

compute::adjacent_difference(
input.begin(), input.end(), output.begin(), queue
);

int first;
int_ first;
compute::copy_n(output.begin(), 1, &first, queue);
BOOST_CHECK_EQUAL(first, 42);

Expand Down