From af7b197e6821399c79fb4d0f7222b557b6d12aba Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Sat, 30 Apr 2016 18:10:24 +0200
Subject: [PATCH 1/8] Skip matrix transpose example on Apple CPU devices

---
 example/matrix_transpose.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/example/matrix_transpose.cpp b/example/matrix_transpose.cpp
index 4edc76d71..ee9b1e9d7 100644
--- a/example/matrix_transpose.cpp
+++ b/example/matrix_transpose.cpp
@@ -251,6 +251,14 @@ int main(int argc, char *argv[])
     std::cout << "Local Size: " << TILE_DIM << "x" << BLOCK_ROWS << " threads" << std::endl;
     std::cout << std::endl;
 
+    // On OSX this example does not work on CPU devices
+    #if defined(__APPLE__)
+    if(device.type() & compute::device::cpu) {
+        std::cout << "On OSX this example does not work on CPU devices" << std::endl;
+        return 0;
+    }
+    #endif
+
     const size_t global_work_size[2] = {rows, cols*BLOCK_ROWS/TILE_DIM};
     const size_t local_work_size[2] = {TILE_DIM, BLOCK_ROWS};
 

From 968056c6535214c86c253ed7cb6e256ae8e41d22 Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Sat, 30 Apr 2016 19:32:52 +0200
Subject: [PATCH 2/8] Adapt command queue test to different devices

Not every device allows for local work group size to be
greater than [1;1;1].
---
 test/test_command_queue.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/test_command_queue.cpp b/test/test_command_queue.cpp
index 86c4f4c9a..ff4973b58 100644
--- a/test/test_command_queue.cpp
+++ b/test/test_command_queue.cpp
@@ -283,6 +283,29 @@ BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
     kernel.set_arg(0, output1);
     kernel.set_arg(1, output2);
 
+    queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));
+
+    CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
+    CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
+
+    // Maximum number of work-items that can be specified in each
+    // dimension of the work-group to clEnqueueNDRangeKernel.
+    std::vector<size_t> max_work_item_sizes =
+        device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
+
+    if(max_work_item_sizes[0] < size_t(2)) {
+        return;
+    }
+
+    queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));
+
+    CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
+    CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
+
+    if(max_work_item_sizes[1] < size_t(2)) {
+        return;
+    }
+
     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));
 
     CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));

From e1993047808c4495999972b638ac5ae478548ac8 Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Sat, 30 Apr 2016 19:33:36 +0200
Subject: [PATCH 3/8] Turn off inplace_reduce and radix_sort tests on Apple

Tests for inplace_reduce, radix_sort and radix_sort_by_key are disabled
for CPU devices on Apple platform because for CPU devices on Apple
platform when local memory is used in a kernel, local work group size
must be [1;1;1]. Those algorithms are designed for GPU devices,
therefore we do no lose any functionality.
---
 test/quirks.hpp                 | 17 +++++++
 test/test_inplace_reduce.cpp    | 17 +++++++
 test/test_radix_sort.cpp        | 89 +++++++++++++++++++++++++++++++++
 test/test_radix_sort_by_key.cpp | 37 ++++++++++++++
 4 files changed, 160 insertions(+)

diff --git a/test/quirks.hpp b/test/quirks.hpp
index 642a4f548..2288540be 100644
--- a/test/quirks.hpp
+++ b/test/quirks.hpp
@@ -25,6 +25,12 @@ inline bool is_pocl_device(const boost::compute::device &device)
     return device.platform().name() == "Portable Computing Language";
 }
 
+// returns true if the device is from Apple OpenCL platform
+inline bool is_apple_device(const boost::compute::device &device)
+{
+    return device.platform().name() == "Apple";
+}
+
 // AMD platforms have a bug when using struct assignment. this affects
 // algorithms like fill() when used with pairs/tuples.
 //
@@ -43,6 +49,17 @@ inline bool bug_in_svmmemcpy(const boost::compute::device &device)
     return boost::compute::detail::is_amd_device(device);
 }
 
+// For CPU devices on Apple platform local memory can not be used when work
+// group size is not [1;1;1]. If work group size is greater "Invalid Work Group
+// Size" error is thrown. (Apple OpenCL implementation can sometimes reduce
+// max work group size for other reasons.)
+// When local memory is not used max work group size for CPU devices on Apple
+// platform should be [1024;1;1].
+inline bool is_apple_cpu_device(const boost::compute::device &device)
+{
+    return is_apple_device(device) && (device.type() & ::boost::compute::device::cpu);
+}
+
 // returns true if the device supports image samplers.
 inline bool supports_image_samplers(const boost::compute::device &device)
 {
diff --git a/test/test_inplace_reduce.cpp b/test/test_inplace_reduce.cpp
index 35b7acca9..37b1f927f 100644
--- a/test/test_inplace_reduce.cpp
+++ b/test/test_inplace_reduce.cpp
@@ -17,10 +17,19 @@
 #include <boost/compute/algorithm/detail/inplace_reduce.hpp>
 #include <boost/compute/container/vector.hpp>
 
+#include "quirks.hpp"
 #include "context_setup.hpp"
 
 BOOST_AUTO_TEST_CASE(sum_int)
 {
+    if(is_apple_cpu_device(device)) {
+        std::cerr
+            << "skipping all inplace_reduce tests due to Apple platform"
+            << " behavior when local memory is used on a CPU device"
+            << std::endl;
+        return;
+    }
+
     int data[] = { 1, 5, 3, 4, 9, 3, 5, 3 };
     boost::compute::vector<int> vector(data, data + 8, queue);
 
@@ -43,6 +52,10 @@ BOOST_AUTO_TEST_CASE(sum_int)
 
 BOOST_AUTO_TEST_CASE(multiply_int)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     int data[] = { 1, 5, 3, 4, 9, 3, 5, 3 };
     boost::compute::vector<int> vector(data, data + 8, queue);
 
@@ -65,6 +78,10 @@ BOOST_AUTO_TEST_CASE(multiply_int)
 
 BOOST_AUTO_TEST_CASE(reduce_iota)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     // 1 value
     boost::compute::vector<int> vector(1, context);
     boost::compute::iota(vector.begin(), vector.end(), int(0), queue);
diff --git a/test/test_radix_sort.cpp b/test/test_radix_sort.cpp
index c22845f71..08e3b425f 100644
--- a/test/test_radix_sort.cpp
+++ b/test/test_radix_sort.cpp
@@ -16,6 +16,7 @@
 #include <boost/compute/algorithm/detail/radix_sort.hpp>
 #include <boost/compute/container/vector.hpp>
 
+#include "quirks.hpp"
 #include "check_macros.hpp"
 #include "context_setup.hpp"
 
@@ -25,6 +26,14 @@ const bool descending = false;
 
 BOOST_AUTO_TEST_CASE(sort_char_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        std::cerr
+            << "skipping all radix_sort tests due to Apple platform"
+            << " behavior when local memory is used on a CPU device"
+            << std::endl;
+        return;
+    }
+
     using boost::compute::char_;
 
     char_ data[] = { 'c', 'a', '0', '7', 'B', 'F', '\0', '$' };
@@ -39,6 +48,10 @@ BOOST_AUTO_TEST_CASE(sort_char_vector)
 
 BOOST_AUTO_TEST_CASE(sort_uchar_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::uchar_;
 
     uchar_ data[] = { 0x12, 0x00, 0xFF, 0xB4, 0x80, 0x32, 0x64, 0xA2 };
@@ -53,6 +66,10 @@ BOOST_AUTO_TEST_CASE(sort_uchar_vector)
 
 BOOST_AUTO_TEST_CASE(sort_short_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::short_;
 
     short_ data[] = { -4, 152, -94, 963, 31002, -456, 0, -2113 };
@@ -67,6 +84,10 @@ BOOST_AUTO_TEST_CASE(sort_short_vector)
 
 BOOST_AUTO_TEST_CASE(sort_ushort_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::ushort_;
 
     ushort_ data[] = { 4, 152, 94, 963, 63202, 34560, 0, 2113 };
@@ -81,6 +102,10 @@ BOOST_AUTO_TEST_CASE(sort_ushort_vector)
 
 BOOST_AUTO_TEST_CASE(sort_int_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 };
     boost::compute::vector<int> vector(data, data + 8, queue);
     BOOST_CHECK_EQUAL(vector.size(), size_t(8));
@@ -93,6 +118,10 @@ BOOST_AUTO_TEST_CASE(sort_int_vector)
 
 BOOST_AUTO_TEST_CASE(sort_uint_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::uint_;
 
     uint_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -107,6 +136,10 @@ BOOST_AUTO_TEST_CASE(sort_uint_vector)
 
 BOOST_AUTO_TEST_CASE(sort_long_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::long_;
 
     long_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -121,6 +154,10 @@ BOOST_AUTO_TEST_CASE(sort_long_vector)
 
 BOOST_AUTO_TEST_CASE(sort_ulong_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::ulong_;
 
     ulong_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -135,6 +172,10 @@ BOOST_AUTO_TEST_CASE(sort_ulong_vector)
 
 BOOST_AUTO_TEST_CASE(sort_float_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     float data[] = { -6023.0f, 152.5f, -63.0f, 1234567.0f, 11.2f,
                      -5000.1f, 0.0f, 14.0f, -8.25f, -0.0f };
     boost::compute::vector<float> vector(data, data + 10, queue);
@@ -160,6 +201,10 @@ BOOST_AUTO_TEST_CASE(sort_float_vector)
 
 BOOST_AUTO_TEST_CASE(sort_double_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     if(!device.supports_extension("cl_khr_fp64")){
         std::cout << "skipping test: device does not support double" << std::endl;
         return;
@@ -181,6 +226,10 @@ BOOST_AUTO_TEST_CASE(sort_double_vector)
 
 BOOST_AUTO_TEST_CASE(sort_char_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::char_;
 
     char_ data[] = { 'c', 'a', '0', '7', 'B', 'F', '\0', '$' };
@@ -205,6 +254,10 @@ BOOST_AUTO_TEST_CASE(sort_char_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_uchar_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::uchar_;
 
     uchar_ data[] = { 0x12, 0x00, 0xFF, 0xB4, 0x80, 0x32, 0x64, 0xA2 };
@@ -229,6 +282,10 @@ BOOST_AUTO_TEST_CASE(sort_uchar_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_short_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::short_;
 
     short_ data[] = { -4, 152, -94, 963, 31002, -456, 0, -2113 };
@@ -253,6 +310,10 @@ BOOST_AUTO_TEST_CASE(sort_short_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_ushort_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::ushort_;
 
     ushort_ data[] = { 4, 152, 94, 963, 63202, 34560, 0, 2113 };
@@ -277,6 +338,10 @@ BOOST_AUTO_TEST_CASE(sort_ushort_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_int_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::int_;
 
     int_ data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 };
@@ -301,6 +366,10 @@ BOOST_AUTO_TEST_CASE(sort_int_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_uint_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::uint_;
 
     uint_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -325,6 +394,10 @@ BOOST_AUTO_TEST_CASE(sort_uint_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_long_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::long_;
 
     long_ data[] = { -500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -349,6 +422,10 @@ BOOST_AUTO_TEST_CASE(sort_long_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_ulong_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     using boost::compute::ulong_;
 
     ulong_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 };
@@ -373,6 +450,10 @@ BOOST_AUTO_TEST_CASE(sort_ulong_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_float_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     float data[] = {
         -6023.0f, 152.5f, -63.0f, 1234567.0f, 11.2f,
         -5000.1f, 0.0f, 14.0f, -8.25f, -0.0f
@@ -412,6 +493,10 @@ BOOST_AUTO_TEST_CASE(sort_float_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_double_vector_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     if(!device.supports_extension("cl_khr_fp64")){
         std::cout << "skipping test: device does not support double" << std::endl;
         return;
@@ -442,6 +527,10 @@ BOOST_AUTO_TEST_CASE(sort_double_vector_desc)
 
 BOOST_AUTO_TEST_CASE(sort_partial_vector)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     int data[] = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
     boost::compute::vector<int> vec(data, data + 10, queue);
 
diff --git a/test/test_radix_sort_by_key.cpp b/test/test_radix_sort_by_key.cpp
index e3c14f08a..25f955759 100644
--- a/test/test_radix_sort_by_key.cpp
+++ b/test/test_radix_sort_by_key.cpp
@@ -16,6 +16,7 @@
 #include <boost/compute/algorithm/detail/radix_sort.hpp>
 #include <boost/compute/container/vector.hpp>
 
+#include "quirks.hpp"
 #include "check_macros.hpp"
 #include "context_setup.hpp"
 
@@ -26,6 +27,14 @@ const bool descending = false;
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_int)
 {
+    if(is_apple_cpu_device(device)) {
+        std::cerr
+            << "skipping all radix_sort_by_key tests due to Apple platform"
+            << " behavior when local memory is used on a CPU device"
+            << std::endl;
+        return;
+    }
+
     compute::int_ keys_data[] =   { 10, 9, 2, 7, 6, -1, 4, 2, 2, 10 };
     compute::int_ values_data[] = { 1,  2, 3, 4, 5,  6, 7, 8, 9, 10 };
 
@@ -51,6 +60,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_int)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_int_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::int_ keys_data[] =   { 10, 9, 2, 7, 6, -1, 4, 2, 2, 10 };
     compute::int_ values_data[] = { 1,  2, 3, 4, 5,  6, 7, 8, 9, 10 };
 
@@ -86,6 +99,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_int_desc)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_uint_by_uint)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::uint_ keys_data[] =   { 10, 9, 2, 7, 6, 1, 4, 2, 2, 10 };
     compute::uint_ values_data[] = { 1,  2, 3, 4, 5, 6, 7, 8, 9, 10 };
 
@@ -111,6 +128,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_uint_by_uint)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_uint_by_uint_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::uint_ keys_data[] =   { 10, 9, 2, 7, 6, 1, 4, 2, 2, 10 };
     compute::uint_ values_data[] = { 1,  2, 3, 4, 5, 6, 7, 8, 9, 10 };
 
@@ -147,6 +168,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_uint_by_uint_desc)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_float)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::float_ keys_data[]   = { 10., 5.5, 10., 7., 5.5};
     compute::int_   values_data[] = {   1, 200, -10,  2, 4 };
 
@@ -172,6 +197,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_float)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_float_desc)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::float_ keys_data[]   = { 10., 5.5, 10., 7., 5.5};
     compute::int_   values_data[] = {   1, 200, -10,  2, 4 };
 
@@ -208,6 +237,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_int_by_float_desc)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_char_by_int)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::int_ keys_data[] = { 6, 1, 1, 3, 4, 7, 5, 1 };
     compute::char_ values_data[] = { 'g', 'c', 'b', 'd', 'e', 'h', 'f', 'a' };
 
@@ -231,6 +264,10 @@ BOOST_AUTO_TEST_CASE(stable_radix_sort_char_by_int)
 // radix_sort_by_key should be stable
 BOOST_AUTO_TEST_CASE(stable_radix_sort_int2_by_int)
 {
+    if(is_apple_cpu_device(device)) {
+        return;
+    }
+
     compute::int_ keys_data[] = { 6, 1, 1, 3, 4, 7, 5, 1 };
     compute::int2_ values_data[] = {
         compute::int2_(1, 1), // 6

From 6412cb2cc6e9a9c2bb47fc173ba47bbcf1d7634c Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Tue, 3 May 2016 13:23:48 +0200
Subject: [PATCH 4/8] Fix typos in test_lambda.cpp

---
 test/test_lambda.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_lambda.cpp b/test/test_lambda.cpp
index d87d1a8e7..03ffff454 100644
--- a/test/test_lambda.cpp
+++ b/test/test_lambda.cpp
@@ -307,7 +307,7 @@ BOOST_AUTO_TEST_CASE(lambda_get_tuple)
     vector.push_back(boost::make_tuple(5, 'c', 5.6f), queue);
     vector.push_back(boost::make_tuple(7, 'd', 7.8f), queue);
 
-    // extract first compoenent of each tuple
+    // extract first component of each tuple
     compute::vector<int> first_component(4, context);
     compute::transform(
         vector.begin(),
@@ -318,7 +318,7 @@ BOOST_AUTO_TEST_CASE(lambda_get_tuple)
     );
     CHECK_RANGE_EQUAL(int, 4, first_component, (1, 3, 5, 7));
 
-    // extract second compoenent of each tuple
+    // extract second component of each tuple
     compute::vector<char> second_component(4, context);
     compute::transform(
         vector.begin(),
@@ -329,7 +329,7 @@ BOOST_AUTO_TEST_CASE(lambda_get_tuple)
     );
     CHECK_RANGE_EQUAL(char, 4, second_component, ('a', 'b', 'c', 'd'));
 
-    // extract third compoenent of each tuple
+    // extract third component of each tuple
     compute::vector<float> third_component(4, context);
     compute::transform(
         vector.begin(),

From 38b75f920c7d6197c6c8769680bdac6a3d87b457 Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Wed, 4 May 2016 19:47:16 +0200
Subject: [PATCH 5/8] Fix boost::compute::discrete_distribution constructor

---
 .../boost/compute/random/discrete_distribution.hpp  | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/boost/compute/random/discrete_distribution.hpp b/include/boost/compute/random/discrete_distribution.hpp
index 3707928f9..ea0411254 100644
--- a/include/boost/compute/random/discrete_distribution.hpp
+++ b/include/boost/compute/random/discrete_distribution.hpp
@@ -42,8 +42,8 @@ class discrete_distribution
     /// the range [\p first, \p last)
     template<class InputIterator>
     discrete_distribution(InputIterator first, InputIterator last)
-        : m_n(std::distance(first, last)),
-          m_probabilities(std::distance(first, last))
+        : m_n((std::max)(size_t(1), static_cast<size_t>(std::distance(first, last)))),
+          m_probabilities((std::max)(size_t(1), static_cast<size_t>(std::distance(first, last))))
     {
         double sum = 0;
 
@@ -52,9 +52,14 @@ class discrete_distribution
             sum += *iter;
         }
 
-        for(size_t i=0; i<m_n; i++)
+        InputIterator iter = first;
+        m_probabilities[0] = (*iter)/sum;
+        iter++;
+
+        for(size_t i = 1; i < m_n; i++)
         {
-            m_probabilities[i] = m_probabilities[i-1] + first[i]/sum;
+            m_probabilities[i] = m_probabilities[i-1] + (*iter)/sum;
+            iter++;
         }
     }
 

From 0fe1ab3e248d10ef5a8e6281a1e1bbf85515e97c Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Wed, 4 May 2016 20:11:29 +0200
Subject: [PATCH 6/8] Add type checks for distributions

This commit adds asserts that check if used template
type is correct (is integral, is a floating point type).
---
 include/boost/compute/random/bernoulli_distribution.hpp   | 8 ++++++++
 include/boost/compute/random/discrete_distribution.hpp    | 8 ++++++++
 include/boost/compute/random/normal_distribution.hpp      | 8 ++++++++
 include/boost/compute/random/uniform_int_distribution.hpp | 8 ++++++++
 .../boost/compute/random/uniform_real_distribution.hpp    | 6 ++++++
 5 files changed, 38 insertions(+)

diff --git a/include/boost/compute/random/bernoulli_distribution.hpp b/include/boost/compute/random/bernoulli_distribution.hpp
index edd112509..50bf27f59 100644
--- a/include/boost/compute/random/bernoulli_distribution.hpp
+++ b/include/boost/compute/random/bernoulli_distribution.hpp
@@ -11,6 +11,9 @@
 #ifndef BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP
 #define BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP
 
+#include <boost/assert.hpp>
+#include <boost/type_traits.hpp>
+
 #include <boost/compute/command_queue.hpp>
 #include <boost/compute/function.hpp>
 #include <boost/compute/types/fundamental.hpp>
@@ -84,6 +87,11 @@ class bernoulli_distribution
 
 private:
     RealType m_p;
+
+    BOOST_STATIC_ASSERT_MSG(
+        boost::is_floating_point<RealType>::value,
+        "Template argument must be a floating point type"
+    );
 };
 
 } // end compute namespace
diff --git a/include/boost/compute/random/discrete_distribution.hpp b/include/boost/compute/random/discrete_distribution.hpp
index ea0411254..03602176d 100644
--- a/include/boost/compute/random/discrete_distribution.hpp
+++ b/include/boost/compute/random/discrete_distribution.hpp
@@ -11,6 +11,9 @@
 #ifndef BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP
 #define BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP
 
+#include <boost/type_traits.hpp>
+#include <boost/static_assert.hpp>
+
 #include <boost/compute/command_queue.hpp>
 #include <boost/compute/function.hpp>
 #include <boost/compute/algorithm/accumulate.hpp>
@@ -114,6 +117,11 @@ class discrete_distribution
 private:
     size_t m_n;
     ::std::vector<double> m_probabilities;
+
+    BOOST_STATIC_ASSERT_MSG(
+        boost::is_integral<IntType>::value,
+        "Template argument must be integral"
+    );
 };
 
 } // end compute namespace
diff --git a/include/boost/compute/random/normal_distribution.hpp b/include/boost/compute/random/normal_distribution.hpp
index 71dd8b9d1..4693e4fff 100644
--- a/include/boost/compute/random/normal_distribution.hpp
+++ b/include/boost/compute/random/normal_distribution.hpp
@@ -13,6 +13,9 @@
 
 #include <limits>
 
+#include <boost/assert.hpp>
+#include <boost/type_traits.hpp>
+
 #include <boost/compute/command_queue.hpp>
 #include <boost/compute/function.hpp>
 #include <boost/compute/types/fundamental.hpp>
@@ -124,6 +127,11 @@ class normal_distribution
 private:
     RealType m_mean;
     RealType m_stddev;
+
+    BOOST_STATIC_ASSERT_MSG(
+        boost::is_floating_point<RealType>::value,
+        "Template argument must be a floating point type"
+    );
 };
 
 } // end compute namespace
diff --git a/include/boost/compute/random/uniform_int_distribution.hpp b/include/boost/compute/random/uniform_int_distribution.hpp
index 92e8b3305..20448afec 100644
--- a/include/boost/compute/random/uniform_int_distribution.hpp
+++ b/include/boost/compute/random/uniform_int_distribution.hpp
@@ -13,6 +13,9 @@
 
 #include <limits>
 
+#include <boost/type_traits.hpp>
+#include <boost/static_assert.hpp>
+
 #include <boost/compute/command_queue.hpp>
 #include <boost/compute/container/vector.hpp>
 #include <boost/compute/function.hpp>
@@ -103,6 +106,11 @@ class uniform_int_distribution
 private:
     IntType m_a;
     IntType m_b;
+
+    BOOST_STATIC_ASSERT_MSG(
+        boost::is_integral<IntType>::value,
+        "Template argument must be integral"
+    );
 };
 
 } // end compute namespace
diff --git a/include/boost/compute/random/uniform_real_distribution.hpp b/include/boost/compute/random/uniform_real_distribution.hpp
index d72d18d20..75d9659d6 100644
--- a/include/boost/compute/random/uniform_real_distribution.hpp
+++ b/include/boost/compute/random/uniform_real_distribution.hpp
@@ -12,6 +12,7 @@
 #define BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP
 
 #include <boost/assert.hpp>
+#include <boost/type_traits.hpp>
 
 #include <boost/compute/command_queue.hpp>
 #include <boost/compute/function.hpp>
@@ -102,6 +103,11 @@ class uniform_real_distribution
 private:
     RealType m_a;
     RealType m_b;
+
+    BOOST_STATIC_ASSERT_MSG(
+        boost::is_floating_point<RealType>::value,
+        "Template argument must be a floating point type"
+    );
 };
 
 } // end compute namespace

From 57ea7d85df7c710392ed96bf0ad9b8e6c3472b72 Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Thu, 5 May 2016 13:23:40 +0200
Subject: [PATCH 7/8] Skipping resize_throw_exception test on Apple

From tests on Travis-CI it seems that on Apple OpenCL platform
clCreatBuffer() does not return NULL and does not set error to
CL_INVALID_BUFFER_SIZE when size of the buffer memory object is
greater than CL_DEVICE_MAX_MEM_ALLOC_SIZE.
---
 test/quirks.hpp      | 8 ++++++++
 test/test_vector.cpp | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/test/quirks.hpp b/test/quirks.hpp
index 2288540be..2c221c042 100644
--- a/test/quirks.hpp
+++ b/test/quirks.hpp
@@ -60,6 +60,14 @@ inline bool is_apple_cpu_device(const boost::compute::device &device)
     return is_apple_device(device) && (device.type() & ::boost::compute::device::cpu);
 }
 
+// On Apple devices clCreateBuffer does not return NULL and does no set error
+// to CL_INVALID_BUFFER_SIZE when size of the buffer memory object is greater
+// than CL_DEVICE_MAX_MEM_ALLOC_SIZE.
+inline bool bug_in_clcreatebuffer(const boost::compute::device &device)
+{
+    return is_apple_device(device);
+}
+
 // returns true if the device supports image samplers.
 inline bool supports_image_samplers(const boost::compute::device &device)
 {
diff --git a/test/test_vector.cpp b/test/test_vector.cpp
index 996940fc2..fca4cea25 100644
--- a/test/test_vector.cpp
+++ b/test/test_vector.cpp
@@ -22,6 +22,7 @@
 #include <boost/compute/allocator/pinned_allocator.hpp>
 #include <boost/compute/container/vector.hpp>
 
+#include "quirks.hpp"
 #include "check_macros.hpp"
 #include "context_setup.hpp"
 
@@ -332,6 +333,13 @@ BOOST_AUTO_TEST_CASE(assign_constant_value)
 
 BOOST_AUTO_TEST_CASE(resize_throw_exception)
 {
+    if(bug_in_clcreatebuffer(device)) {
+        std::cerr
+            << "skipping resize_throw_exception test on Apple platform"
+            << std::endl;
+        return;
+    }
+
     // create vector with eight items
     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
     compute::vector<int> vec(data, data + 8, queue);

From 13e173c97e1ea9c6650b15918a041740505833b1 Mon Sep 17 00:00:00 2001
From: Jakub Szuppe <j.szuppe@gmail.com>
Date: Thu, 5 May 2016 14:35:21 +0200
Subject: [PATCH 8/8] Failures not allowed on OSX Travis-CI build

---
 .travis.yml | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d0b20ecb9..42d2389e7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -257,24 +257,12 @@ matrix:
     ############################################################################
     # OSX
     ############################################################################
-    # OSX build with running tests
-    - os: osx
-      compiler: clang
-      env:
-        - ENV_CXX_FLAGS="-Wno-c99-extensions"
-        - RUN_TESTS=true
-    # OSX build without running tests
+
+    # OSX build
     - os: osx
       compiler: clang
       env:
         - ENV_CXX_FLAGS="-Wno-c99-extensions"
-        - RUN_TESTS=false
-
-  allow_failures:
-    - os: osx # OSX build with running tests is allowed to fail
-      env:
-        - ENV_CXX_FLAGS="-Wno-c99-extensions"
-        - RUN_TESTS=true
 
 before_install:
     # Install recent cmake