diff --git a/example/threefry_engine.cpp b/example/threefry_engine.cpp new file mode 100644 index 000000000..5cbf6c8e0 --- /dev/null +++ b/example/threefry_engine.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Muhammad Junaid Muzammil +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + + +#include +#include +#include +#include +#include +#include +#include + +int main() +{ + using boost::compute::uint_; + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + boost::compute::threefry_engine<> rng(queue); + boost::compute::vector vector_ctr(20, context); + + uint32_t ctr[20]; + for(int i = 0; i < 10; i++) { + ctr[i*2] = i; + ctr[i*2+1] = 0; + } + boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); + rng.generate(vector_ctr.begin(), vector_ctr.end(), queue); + boost::compute::copy(vector_ctr.begin(), vector_ctr.end(), ctr, queue); + + for(int i = 0; i < 10; i++) { + std::cout << std::hex << ctr[i*2] << " " << ctr[i*2+1] << std::endl; + } + return 0; +} + diff --git a/include/boost/compute/random.hpp b/include/boost/compute/random.hpp index b0c157dc3..1a361fe48 100644 --- a/include/boost/compute/random.hpp +++ b/include/boost/compute/random.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/include/boost/compute/random/threefry_engine.hpp b/include/boost/compute/random/threefry_engine.hpp new file mode 100644 index 000000000..97af34d08 --- /dev/null +++ b/include/boost/compute/random/threefry_engine.hpp @@ -0,0 +1,302 @@ +// Added By: Muhammad Junaid Muzammil +// Copyright 2010-2012, D. E. Shaw Research. +// All rights reserved. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: + +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions, and the following disclaimer. + +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions, and the following disclaimer in the +// documentation and/or other materials provided with the distribution. + +// * Neither the name of D. E. Shaw Research nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP +#define BOOST_COMPUTE_RANDOM_THREEFRY_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost { +namespace compute { + +/// \class threefry_engine +/// \brief Threefry pseudorandom number generator. +template +class threefry_engine +{ +public: + static const size_t threads = 1024; + typedef T result_type; + + /// Creates a new threefry_engine and seeds it with \p value. + explicit threefry_engine(command_queue &queue) + : m_context(queue.get_context()) + { + // setup program + load_program(); + } + + /// Creates a new threefry_engine object as a copy of \p other. + threefry_engine(const threefry_engine &other) + : m_context(other.m_context), + m_program(other.m_program) + { + } + + /// Copies \p other to \c *this. + threefry_engine& operator=(const threefry_engine &other) + { + if(this != &other){ + m_context = other.m_context; + m_program = other.m_program; + } + + return *this; + } + + /// Destroys the threefry_engine object. + ~threefry_engine() + { + } + +private: + /// \internal_ + void load_program() + { + boost::shared_ptr cache = + program_cache::get_global_cache(m_context); + std::string cache_key = + std::string("threefry_engine_32x2"); + + const char source[] = + "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" + "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" + + "enum r123_enum_threefry32x2 {\n" + " R_32x2_0_0=13,\n" + " R_32x2_1_0=15,\n" + " R_32x2_2_0=26,\n" + " R_32x2_3_0= 6,\n" + " R_32x2_4_0=17,\n" + " R_32x2_5_0=29,\n" + " R_32x2_6_0=16,\n" + " R_32x2_7_0=24\n" + "};\n" + + "static uint RotL_32(uint x, uint N)\n" + "{\n" + " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" + "}\n" + + "struct r123array2x32 {\n" + " uint v[2];\n" + "};\n" + "typedef struct r123array2x32 threefry2x32_ctr_t;\n" + "typedef struct r123array2x32 threefry2x32_key_t;\n" + + "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" + "{\n" + " threefry2x32_ctr_t X;\n" + " uint ks[3];\n" + " uint i; \n" + " ks[2] = SKEIN_KS_PARITY_32;\n" + " for (i=0;i < 2; i++) {\n" + " ks[i] = k.v[i];\n" + " X.v[i] = in.v[i];\n" + " ks[2] ^= k.v[i];\n" + " }\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 1;\n" + " }\n" + " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 2;\n" + " }\n" + " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 3;\n" + " }\n" + " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 4;\n" + " }\n" + " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 5;\n" + " }\n" + " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 6;\n" + " }\n" + " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 7;\n" + " }\n" + " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 8;\n" + " }\n" + " return X;\n" + "}\n" + + "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n" + " threefry2x32_ctr_t in;\n" + " threefry2x32_key_t k;\n" + " const uint i = get_global_id(0);\n" + " in.v[0] = ctr[2 * (offset + i)];\n" + " in.v[1] = ctr[2 * (offset + i) + 1];\n" + " k.v[0] = key[2 * (offset + i)];\n" + " k.v[1] = key[2 * (offset + i) + 1];\n" + " in = threefry2x32_R(20, in, k);\n" + " ctr[2 * (offset + i)] = in.v[0];\n" + " ctr[2 * (offset + i) + 1] = in.v[1];\n" + "}\n"; + + m_program = cache->get_or_build(cache_key, std::string(), source, m_context); + } + +public: + + + /// Generates Threefry random numbers using both the counter and key values, and then stores + /// them to the range [\p first_ctr, \p last_ctr). + template + void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) { + const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); + const size_t size_key = detail::iterator_range_size(first_key, last_key); + if(!size_ctr || !size_key || (size_ctr != size_key)) { + return; + } + kernel rng_kernel = m_program.create_kernel("generate_rng"); + + rng_kernel.set_arg(0, first_ctr.get_buffer()); + rng_kernel.set_arg(1, first_key.get_buffer()); + size_t offset = 0; + + for(;;){ + size_t count = 0; + size_t size = size_ctr/2; + if(size > threads){ + count = threads; + } + else { + count = size; + } + rng_kernel.set_arg(2, static_cast(offset)); + queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); + + offset += count; + + if(offset >= size){ + break; + } + + } + } + + template + void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) { + const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); + if(!size_ctr) { + return; + } + boost::compute::vector vector_key(size_ctr, m_context); + vector_key.assign(size_ctr, 0, queue); + kernel rng_kernel = m_program.create_kernel("generate_rng"); + + rng_kernel.set_arg(0, first_ctr.get_buffer()); + rng_kernel.set_arg(1, vector_key); + size_t offset = 0; + + for(;;){ + size_t count = 0; + size_t size = size_ctr/2; + if(size > threads){ + count = threads; + } + else { + count = size; + } + rng_kernel.set_arg(2, static_cast(offset)); + queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); + + offset += count; + + if(offset >= size){ + break; + } + + } + } +private: + context m_context; + program m_program; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP diff --git a/perf/perf_random_number_engine.cpp b/perf/perf_random_number_engine.cpp index 49cdaab06..db25d437e 100644 --- a/perf/perf_random_number_engine.cpp +++ b/perf/perf_random_number_engine.cpp @@ -89,6 +89,9 @@ int main(int argc, char *argv[]) else if(engine == "linear_congruential_engine"){ perf_random_number_engine >(size, trials, queue); } + else if(engine == "threefry_engine"){ + perf_random_number_engine >(size, trials, queue); + } else { std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl; return -1; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 934fbe2e2..ff2532cbd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -175,6 +175,7 @@ add_compute_test("random.bernoulli_distribution" test_bernoulli_distribution.cpp add_compute_test("random.discrete_distribution" test_discrete_distribution.cpp) add_compute_test("random.linear_congruential_engine" test_linear_congruential_engine.cpp) add_compute_test("random.mersenne_twister_engine" test_mersenne_twister_engine.cpp) +add_compute_test("random.threefry_engine" test_threefry_engine.cpp) add_compute_test("random.normal_distribution" test_normal_distribution.cpp) add_compute_test("random.uniform_int_distribution" test_uniform_int_distribution.cpp) add_compute_test("random.uniform_real_distribution" test_uniform_real_distribution.cpp) diff --git a/test/test_threefry_engine.cpp b/test/test_threefry_engine.cpp new file mode 100644 index 000000000..e70405639 --- /dev/null +++ b/test/test_threefry_engine.cpp @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Muhammad Junaid Muzammil +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestThreefry +#include + +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +BOOST_AUTO_TEST_CASE(generate_uint) +{ + + using boost::compute::uint_; + + boost::compute::threefry_engine<> rng(queue); + + boost::compute::vector vector_ctr(20, context); + + uint32_t ctr[20]; + for(int i = 0; i < 10; i++) { + ctr[i*2] = i; + ctr[i*2+1] = 0; + } + + boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); + + rng.generate(vector_ctr.begin(), vector_ctr.end(), queue); + CHECK_RANGE_EQUAL( + uint_, 20, vector_ctr, + (uint_(0x6b200159), + uint_(0x99ba4efe), + uint_(0x508efb2c), + uint_(0xc0de3f32), + uint_(0x64a626ec), + uint_(0xfc15e573), + uint_(0xb8abc4d1), + uint_(0x537eb86), + uint_(0xac6dc2bb), + uint_(0xa7adb3c3), + uint_(0x5641e094), + uint_(0xe4ab4fd), + uint_(0xa53c1ce9), + uint_(0xabcf1dba), + uint_(0x2677a25a), + uint_(0x76cf5efc), + uint_(0x2d08247f), + uint_(0x815480f1), + uint_(0x2d1fa53a), + uint_(0xdfe8514c)) + ); +} + +BOOST_AUTO_TEST_SUITE_END()