Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions example/threefry_engine.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//


#include <boost/compute/random/threefry_engine.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/context.hpp>
#include <boost/compute/device.hpp>
#include <boost/compute/system.hpp>
#include <iostream>

int main()
{
using boost::compute::uint_;
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
boost::compute::threefry_engine<> rng(queue);
boost::compute::vector<uint_> vector_ctr(20, context);

uint32_t ctr[20];
for(int i = 0; i < 10; i++) {
ctr[i*2] = i;
ctr[i*2+1] = 0;
}
boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue);
rng.generate(vector_ctr.begin(), vector_ctr.end(), queue);
boost::compute::copy(vector_ctr.begin(), vector_ctr.end(), ctr, queue);

for(int i = 0; i < 10; i++) {
std::cout << std::hex << ctr[i*2] << " " << ctr[i*2+1] << std::endl;
}
return 0;
}

1 change: 1 addition & 0 deletions include/boost/compute/random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <boost/compute/random/discrete_distribution.hpp>
#include <boost/compute/random/linear_congruential_engine.hpp>
#include <boost/compute/random/mersenne_twister_engine.hpp>
#include <boost/compute/random/threefry_engine.hpp>
#include <boost/compute/random/normal_distribution.hpp>
#include <boost/compute/random/uniform_int_distribution.hpp>
#include <boost/compute/random/uniform_real_distribution.hpp>
Expand Down
302 changes: 302 additions & 0 deletions include/boost/compute/random/threefry_engine.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
// Added By: Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
// Copyright 2010-2012, D. E. Shaw Research.
// All rights reserved.

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:

// * Redistributions of source code must retain the above copyright
// notice, this list of conditions, and the following disclaimer.

// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions, and the following disclaimer in the
// documentation and/or other materials provided with the distribution.

// * Neither the name of D. E. Shaw Research nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
#define BOOST_COMPUTE_RANDOM_THREEFRY_HPP

#include <boost/compute/types.hpp>
#include <boost/compute/buffer.hpp>
#include <boost/compute/kernel.hpp>
#include <boost/compute/context.hpp>
#include <boost/compute/program.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/transform.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
#include <boost/compute/utility/program_cache.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/iterator/discard_iterator.hpp>

namespace boost {
namespace compute {

/// \class threefry_engine
/// \brief Threefry pseudorandom number generator.
template<class T = uint_>
class threefry_engine
{
public:
static const size_t threads = 1024;
typedef T result_type;

/// Creates a new threefry_engine and seeds it with \p value.
explicit threefry_engine(command_queue &queue)
: m_context(queue.get_context())
{
// setup program
load_program();
}

/// Creates a new threefry_engine object as a copy of \p other.
threefry_engine(const threefry_engine<T> &other)
: m_context(other.m_context),
m_program(other.m_program)
{
}

/// Copies \p other to \c *this.
threefry_engine<T>& operator=(const threefry_engine<T> &other)
{
if(this != &other){
m_context = other.m_context;
m_program = other.m_program;
}

return *this;
}

/// Destroys the threefry_engine object.
~threefry_engine()
{
}

private:
/// \internal_
void load_program()
{
boost::shared_ptr<program_cache> cache =
program_cache::get_global_cache(m_context);
std::string cache_key =
std::string("threefry_engine_32x2");

const char source[] =
"#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
"#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"

"enum r123_enum_threefry32x2 {\n"
" R_32x2_0_0=13,\n"
" R_32x2_1_0=15,\n"
" R_32x2_2_0=26,\n"
" R_32x2_3_0= 6,\n"
" R_32x2_4_0=17,\n"
" R_32x2_5_0=29,\n"
" R_32x2_6_0=16,\n"
" R_32x2_7_0=24\n"
"};\n"

"static uint RotL_32(uint x, uint N)\n"
"{\n"
" return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
"}\n"

"struct r123array2x32 {\n"
" uint v[2];\n"
"};\n"
"typedef struct r123array2x32 threefry2x32_ctr_t;\n"
"typedef struct r123array2x32 threefry2x32_key_t;\n"

"threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
"{\n"
" threefry2x32_ctr_t X;\n"
" uint ks[3];\n"
" uint i; \n"
" ks[2] = SKEIN_KS_PARITY_32;\n"
" for (i=0;i < 2; i++) {\n"
" ks[i] = k.v[i];\n"
" X.v[i] = in.v[i];\n"
" ks[2] ^= k.v[i];\n"
" }\n"
" X.v[0] += ks[0]; X.v[1] += ks[1];\n"
" if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>3){\n"
" X.v[0] += ks[1]; X.v[1] += ks[2];\n"
" X.v[1] += 1;\n"
" }\n"
" if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>7){\n"
" X.v[0] += ks[2]; X.v[1] += ks[0];\n"
" X.v[1] += 2;\n"
" }\n"
" if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>11){\n"
" X.v[0] += ks[0]; X.v[1] += ks[1];\n"
" X.v[1] += 3;\n"
" }\n"
" if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>15){\n"
" X.v[0] += ks[1]; X.v[1] += ks[2];\n"
" X.v[1] += 4;\n"
" }\n"
" if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>19){\n"
" X.v[0] += ks[2]; X.v[1] += ks[0];\n"
" X.v[1] += 5;\n"
" }\n"
" if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>23){\n"
" X.v[0] += ks[0]; X.v[1] += ks[1];\n"
" X.v[1] += 6;\n"
" }\n"
" if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>27){\n"
" X.v[0] += ks[1]; X.v[1] += ks[2];\n"
" X.v[1] += 7;\n"
" }\n"
" if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
" if(Nrounds>31){\n"
" X.v[0] += ks[2]; X.v[1] += ks[0];\n"
" X.v[1] += 8;\n"
" }\n"
" return X;\n"
"}\n"

"__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n"
" threefry2x32_ctr_t in;\n"
" threefry2x32_key_t k;\n"
" const uint i = get_global_id(0);\n"
" in.v[0] = ctr[2 * (offset + i)];\n"
" in.v[1] = ctr[2 * (offset + i) + 1];\n"
" k.v[0] = key[2 * (offset + i)];\n"
" k.v[1] = key[2 * (offset + i) + 1];\n"
" in = threefry2x32_R(20, in, k);\n"
" ctr[2 * (offset + i)] = in.v[0];\n"
" ctr[2 * (offset + i) + 1] = in.v[1];\n"
"}\n";

m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
}

public:


/// Generates Threefry random numbers using both the counter and key values, and then stores
/// them to the range [\p first_ctr, \p last_ctr).
template<class OutputIterator>
void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) {
const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
const size_t size_key = detail::iterator_range_size(first_key, last_key);
if(!size_ctr || !size_key || (size_ctr != size_key)) {
return;
}
kernel rng_kernel = m_program.create_kernel("generate_rng");

rng_kernel.set_arg(0, first_ctr.get_buffer());
rng_kernel.set_arg(1, first_key.get_buffer());
size_t offset = 0;

for(;;){
size_t count = 0;
size_t size = size_ctr/2;
if(size > threads){
count = threads;
}
else {
count = size;
}
rng_kernel.set_arg(2, static_cast<const uint_>(offset));
queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);

offset += count;

if(offset >= size){
break;
}

}
}

template<class OutputIterator>
void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) {
const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
if(!size_ctr) {
return;
}
boost::compute::vector<uint_> vector_key(size_ctr, m_context);
vector_key.assign(size_ctr, 0, queue);
kernel rng_kernel = m_program.create_kernel("generate_rng");

rng_kernel.set_arg(0, first_ctr.get_buffer());
rng_kernel.set_arg(1, vector_key);
size_t offset = 0;

for(;;){
size_t count = 0;
size_t size = size_ctr/2;
if(size > threads){
count = threads;
}
else {
count = size;
}
rng_kernel.set_arg(2, static_cast<const uint_>(offset));
queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);

offset += count;

if(offset >= size){
break;
}

}
}
private:
context m_context;
program m_program;
};

} // end compute namespace
} // end boost namespace

#endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP
3 changes: 3 additions & 0 deletions perf/perf_random_number_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ int main(int argc, char *argv[])
else if(engine == "linear_congruential_engine"){
perf_random_number_engine<compute::linear_congruential_engine<> >(size, trials, queue);
}
else if(engine == "threefry_engine"){
perf_random_number_engine<compute::threefry_engine<> >(size, trials, queue);
}
else {
std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl;
return -1;
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ add_compute_test("random.bernoulli_distribution" test_bernoulli_distribution.cpp
add_compute_test("random.discrete_distribution" test_discrete_distribution.cpp)
add_compute_test("random.linear_congruential_engine" test_linear_congruential_engine.cpp)
add_compute_test("random.mersenne_twister_engine" test_mersenne_twister_engine.cpp)
add_compute_test("random.threefry_engine" test_threefry_engine.cpp)
add_compute_test("random.normal_distribution" test_normal_distribution.cpp)
add_compute_test("random.uniform_int_distribution" test_uniform_int_distribution.cpp)
add_compute_test("random.uniform_real_distribution" test_uniform_real_distribution.cpp)
Expand Down
Loading