diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 720819511..776285de5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -67,6 +67,7 @@ add_vexcl_test(svm svm.cpp) add_vexcl_test(events events.cpp) add_vexcl_test(image image.cpp) add_vexcl_test(custom_kernel custom_kernel.cpp) +add_vexcl_test(eval eval.cpp) add_vexcl_test(multiple_objects "dummy1.cpp;dummy2.cpp") if (NOT DEFINED ENV{APPVEYOR}) diff --git a/tests/eval.cpp b/tests/eval.cpp new file mode 100644 index 000000000..db2865b3b --- /dev/null +++ b/tests/eval.cpp @@ -0,0 +1,30 @@ +#define BOOST_TEST_MODULE Eval +#include +#include +#include +#include +#include +#include +#include "context_setup.hpp" + +BOOST_AUTO_TEST_CASE(eval_atomic) { + const size_t M = 16; + const size_t C = 64; + const size_t N = M * C; + + std::vector q(1, ctx.queue(0)); + + vex::vector x(q, N); + vex::vector y(q, M); + + y = 0; + x = vex::element_index() % M; + + vex::eval(atomic_add(&vex::permutation(x)(y), 1)); + check_sample(y, [](size_t, int v) { BOOST_CHECK_EQUAL(v, C); }); + + vex::eval(atomic_sub(&vex::permutation(x)(y), 1)); + check_sample(y, [](size_t, int v) { BOOST_CHECK_EQUAL(v, 0); }); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/vexcl/eval.hpp b/vexcl/eval.hpp new file mode 100644 index 000000000..cb90c6e5c --- /dev/null +++ b/vexcl/eval.hpp @@ -0,0 +1,127 @@ +#ifndef VEXCL_EVAL_HPP +#define VEXCL_EVAL_HPP + +/* +The MIT License + +Copyright (c) 2012-2016 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file vexcl/eval.hpp + * \author Denis Demidov + * \brief Evaluate vector expression. + */ + +#include + +namespace vex { + +template +void eval(const Expr &expr, + const std::vector &queue, + const std::vector &part + ) +{ + using namespace vex::detail; + +#if (VEXCL_CHECK_SIZES > 0) + { + get_expression_properties prop; + extract_terminals()(boost::proto::as_child(expr), prop); + + precondition( + prop.queue.empty() || prop.queue.size() == queue.size(), + "Incompatible queue lists" + ); + + precondition( + prop.size == 0 || prop.size == part.back(), + "Incompatible expression sizes" + ); + } +#endif + static kernel_cache cache; + + for(unsigned d = 0; d < queue.size(); d++) { + auto kernel = cache.find(queue[d]); + + backend::select_context(queue[d]); + + if (kernel == cache.end()) { + backend::source_generator source(queue[d]); + + output_terminal_preamble termpream(source, queue[d], "prm", empty_state()); + + boost::proto::eval(boost::proto::as_child(expr), termpream); + + source.kernel("vexcl_eval_kernel") + .open("(") + .parameter("n"); + + declare_expression_parameter declare(source, queue[d], "prm", empty_state()); + extract_terminals()(boost::proto::as_child(expr), declare); + + source.close(")") + .open("{") + .grid_stride_loop() + .open("{"); + + output_local_preamble loc_init(source, queue[d], "prm", empty_state()); + boost::proto::eval(boost::proto::as_child(expr), loc_init); + + source.new_line(); + vector_expr_context expr_ctx(source, queue[d], "prm", empty_state()); + boost::proto::eval(boost::proto::as_child(expr), expr_ctx); + source << ";"; + source.close("}").close("}"); + + kernel = cache.insert(queue[d], backend::kernel( + queue[d], source.str(), "vexcl_eval_kernel")); + } + + if (size_t psize = part[d + 1] - part[d]) { + auto &K = kernel->second; + K.push_arg(psize); + set_expression_argument setarg(K, d, part[d], empty_state()); + extract_terminals()( boost::proto::as_child(expr), setarg); + K(queue[d]); + } + } +} + +template +void eval(const Expr &expr) { + using namespace vex::detail; + + get_expression_properties prop; + extract_terminals()(boost::proto::as_child(expr), prop); + + precondition(!prop.queue.empty() && !prop.part.empty(), + "Can not determine expression size and queue list" + ); + + eval(expr, prop.queue, prop.part); +} + +} + +#endif diff --git a/vexcl/function.hpp b/vexcl/function.hpp index d031e1ece..30b50bce2 100644 --- a/vexcl/function.hpp +++ b/vexcl/function.hpp @@ -265,6 +265,21 @@ rtype operator()(VEXCL_DUAL_FUNCTOR_ARGS(args)) const { \ nargs, VEXCL_BUILTIN_PRINT_BOOST_REF, ~)); \ } +#define VEX_BUILTIN_FUNCTION_ALIAS(nargs, alias, func) \ + struct func##_alias : vex::builtin_function { \ + static const char *name() { return #func; } \ + }; \ + template \ + typename boost::proto::result_of::make_expr< \ + boost::proto::tag::function, func##_alias, \ + BOOST_PP_ENUM_BINARY_PARAMS(nargs, const Arg, \ + &BOOST_PP_INTERCEPT)>::type const \ + alias(BOOST_PP_ENUM_BINARY_PARAMS(nargs, const Arg, &arg)) { \ + return boost::proto::make_expr( \ + func##_alias(), BOOST_PP_ENUM( \ + nargs, VEXCL_BUILTIN_PRINT_BOOST_REF, ~)); \ + } + /// \defgroup builtins Builtin device functions /** @{ */ VEX_BUILTIN_FUNCTION( 2, abs_diff ) @@ -390,6 +405,50 @@ VEX_BUILTIN_FUNCTION( 1, tgamma ) VEX_BUILTIN_FUNCTION( 1, trunc ) VEX_BUILTIN_FUNCTION( 2, upsample ) +// Atomic functions +#if defined(VEXCL_BACKEND_CUDA) + +VEX_BUILTIN_FUNCTION( 2, atomicAdd ) +VEX_BUILTIN_FUNCTION( 2, atomicSub ) +VEX_BUILTIN_FUNCTION( 2, atomicExch ) +VEX_BUILTIN_FUNCTION( 2, atomicMin ) +VEX_BUILTIN_FUNCTION( 2, atomicMax ) +VEX_BUILTIN_FUNCTION( 2, atomicInc ) +VEX_BUILTIN_FUNCTION( 2, atomicDec ) +VEX_BUILTIN_FUNCTION( 3, atomicCAS ) +VEX_BUILTIN_FUNCTION( 2, atomicAnd ) +VEX_BUILTIN_FUNCTION( 2, atomicOr ) +VEX_BUILTIN_FUNCTION( 2, atomicXor ) + +// Also provide aliases for OpenCL-style functions +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_add, atomicAdd ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_sub, atomicSub ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_xchg, atomicExch ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_min, atomicMin ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_max, atomicMax ) +VEX_BUILTIN_FUNCTION_ALIAS(3, atomic_cmpxchg, atomicCAS ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_and, atomicAnd ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_or, atomicOr ) +VEX_BUILTIN_FUNCTION_ALIAS(2, atomic_xor, atomicXor ) + +#else + +VEX_BUILTIN_FUNCTION(2, atomic_add ) +VEX_BUILTIN_FUNCTION(2, atomic_sub ) +VEX_BUILTIN_FUNCTION(2, atomic_xchg ) +VEX_BUILTIN_FUNCTION(2, atomic_xchg ) +VEX_BUILTIN_FUNCTION(2, atomic_min ) +VEX_BUILTIN_FUNCTION(2, atomic_max ) +VEX_BUILTIN_FUNCTION(1, atomic_inc ) +VEX_BUILTIN_FUNCTION(1, atomic_dec ) +VEX_BUILTIN_FUNCTION(3, atomic_cmpxchg ) +VEX_BUILTIN_FUNCTION(2, atomic_and ) +VEX_BUILTIN_FUNCTION(2, atomic_or ) +VEX_BUILTIN_FUNCTION(2, atomic_xor ) + +#endif + + // Special case: abs() overloaded with floating point arguments should call // fabs in the OpenCL code struct abs_func : builtin_function { @@ -398,7 +457,6 @@ struct abs_func : builtin_function { } }; - namespace detail { template struct return_type; } diff --git a/vexcl/vexcl.hpp b/vexcl/vexcl.hpp index 24915f298..fee314e6c 100644 --- a/vexcl/vexcl.hpp +++ b/vexcl/vexcl.hpp @@ -62,6 +62,7 @@ THE SOFTWARE. #include #include #include +#include #ifndef VEXCL_BACKEND_CUDA #include