boostorg · jzmaddock · Jan 14, 2016 · Jan 15, 2016 · Jan 15, 2016 · Jan 16, 2016
diff --git a/doc/math.qbk b/doc/math.qbk
@@ -159,6 +159,7 @@ and use the function's name as the link text.]
 [def __double_factorial [link math_toolkit.factorials.sf_double_factorial double_factorial]]
 [def __rising_factorial [link math_toolkit.factorials.sf_rising_factorial rising_factorial]]
 [def __falling_factorial [link math_toolkit.factorials.sf_falling_factorial falling_factorial]]
+[def __binomial_coefficient [link math_toolkit.factorials.sf_binomial binomial_coefficient]]
 
 [/error functions]
 [def __erf [link math_toolkit.sf_erf.error_function erf]]
@@ -185,6 +186,7 @@ and use the function's name as the link text.]
 [def __ellint_rf  [link math_toolkit.ellint.ellint_carlson ellint_rf]]
 [def __ellint_rc  [link math_toolkit.ellint.ellint_carlson ellint_rc]]
 [def __ellint_rd  [link math_toolkit.ellint.ellint_carlson ellint_rd]]
+[def __ellint_rg  [link math_toolkit.ellint.ellint_carlson ellint_rg]]
 [def __ellint_1  [link math_toolkit.ellint.ellint_1 ellint_1]]
 [def __ellint_2  [link math_toolkit.ellint.ellint_2 ellint_2]]
 [def __ellint_3  [link math_toolkit.ellint.ellint_3 ellint_3]]
@@ -240,6 +242,8 @@ and use the function's name as the link text.]
 [/sinus cardinals]
 [def __sinc_pi  [link math_toolkit.sinc.sinc_pi sinc_pi]]
 [def __sinhc_pi  [link math_toolkit.sinc.sinhc_pi sinhc_pi]]
+[def __sin_pi  [link math_toolkit.powers.sin_pi sin_pi]]
+[def __cos_pi  [link math_toolkit.powers.cos_pi cos_pi]]
 
 [/Inverse hyperbolics]
 [def __acosh  [link math_toolkit.inv_hyper.acosh acosh]]
@@ -260,6 +264,9 @@ and use the function's name as the link text.]
 [def __nextafter [link math_toolkit.next_float.nextafter nextafter]]
 [def __float_advance [link math_toolkit.next_float.float_advance float_advance]]
 [def __ulp [link math_toolkit.next_float.ulp ulp]]
+[def __signbit [link math_toolkit.sign_functions signbit]]
+[def __sign [link math_toolkit.sign_functions sign]]
+[def __changesign [link math_toolkit.sign_functions changesign]]
 
 [/powers etc]
 [def __expm1 [link math_toolkit.powers.expm1 expm1]]
@@ -324,7 +331,7 @@ and use the function's name as the link text.]
 [def __algorithms [link math_toolkit.dist_ref.dist_algorithms algorithms]]
 
 [/ distribution def names end in distrib to avoid clashes]
-[def __arcsine_distrib [link math_toolkit.dist_ref.dists.arcsine_dist Arcsine Distribution]]
+[def __arcsine_distrib [link math_toolkit.dist_ref.dists.arcine_dist Arcsine Distribution]]
 [def __beta_distrib [link math_toolkit.dist_ref.dists.beta_dist Beta Distribution]]
 [def __binomial_distrib [link math_toolkit.dist_ref.dists.binomial_dist Binomial Distribution]]
 [def __cauchy_distrib [link math_toolkit.dist_ref.dists.cauchy_dist Cauchy Distribution]]
@@ -351,8 +358,11 @@ and use the function's name as the link text.]
 [def __normal_distrib [link math_toolkit.dist_ref.dists.normal_dist Normal Distribution]]
 [def __poisson_distrib [link math_toolkit.dist_ref.dists.poisson_dist Poisson Distribution]]
 [def __pareto_distrib [link math_toolkit.dist_ref.dists.pareto Pareto Distribution]]
+[def __rayleigh_distrib [link math_toolkit.dist_ref.dists.rayleigh Rayleigh Distribution]]
 [def __students_t_distrib [link math_toolkit.dist_ref.dists.students_t_dist Students t Distribution]]
 [def __skew_normal_distrib [link math_toolkit.dist_ref.dists.skew_normal_dist Skew Normal Distribution]]
+[def __triangular_distrib [link math_toolkit.dist_ref.dists.triangular_dist Triangular Distribution]]
+[def __uniform_distrib [link math_toolkit.dist_ref.dists.uniform_dist Uniform Distribution]]
 [def __weibull_distrib [link math_toolkit.dist_ref.dists.weibull_dist Weibull Distribution]]
 
 [/links to policy]
@@ -487,6 +497,7 @@ and as a CD ISBN 0-9504833-2-X  978-0-9504833-2-0, Classification 519.2-dc22.
 [include overview/structure.qbk] [/getting about, directory and file structure.]
 [include overview/result_type_calc.qbk]
 [include overview/error_handling.qbk]
+[include overview/cuda.qbk]
 
 [section:compilers_overview Compilers]
 [compilers_overview]
@@ -637,6 +648,7 @@ and as a CD ISBN 0-9504833-2-X  978-0-9504833-2-0, Classification 519.2-dc22.
 [include quadrature/gauss_kronrod.qbk]
 [include quadrature/double_exponential.qbk]
 [include quadrature/naive_monte_carlo.qbk]
+[include quadrature/cuda_naive_monte_carlo.qbk]
 [include differentiation/numerical_differentiation.qbk]
 [endmathpart]
 

diff --git a/doc/overview/cuda.qbk b/doc/overview/cuda.qbk
@@ -0,0 +1,58 @@
+[section:cuda CUDA support]
+
+This library does not (yet) use CUDA internally, however there are a number of functions which are
+marked up for use on CUDA devices.
+
+Currently nearly all of our internal helper functions, plus the mathematical __constants and the __boost_math_fp
+are marked as safe for use on CUDA devices.
+
+In addition the following special functions are all safe to use in CUDA device code:
+
+__beta, __ibeta_derivative, __binomial_coefficient, __erf, __erfc, __erf_inv, __erfc_inv,
+__ellint_rf, __ellint_rd, __ellint_rc, __ellint_rj, __ellint_rg, __ellint_1, __ellint_2, __ellint_3,
+__ellint_rf, __ellint_rd, __ellint_rc, __ellint_rj, __ellint_rg, __ellint_1, __ellint_2, __ellint_3,
+__ellint_d, __jacobi_zeta, __heuman_lambda, __factorial, __unchecked_factorial, __double_factorial,
+__falling_factorial, __rising_factorial, __tgamma, __tgamma1pm1, __lgamma, __tgamma_lower,
+__gamma_q, __gamma_p, __tgamma_delta_ratio, __tgamma_ratio, __gamma_p_derivative, __cbrt, __log1p,
+__expm1, __powm1, __sinc_pi, __sinhc_pi, __asinh, __acosh, __atanh, __sin_pi, __cos_pi,
+__fpclassify, __isnan, __isinf, __isnormal, __isfinite, __signbit, __sign, __changesign, __pow,
+__nextafter, __float_next, __float_prior, __float_distance, __float_advance.
+
+The following distribution functions are also supported:
+
+[table
+[[Distribution][pdf][cdf][qualtile]]
+[[__arcsine_distrib][Yes][Yes][Yes]]
+[[__cauchy_distrib][Yes][Yes][Yes]]
+[[__chi_squared_distrib][Yes][Yes][No]]
+[[__extreme_distrib][Yes][Yes][Yes]]
+[[__exp_distrib][Yes][Yes][Yes]]
+[[__gamma_distrib][Yes][Yes][No]]
+[[__geometric_distrib][Yes][Yes][No]]
+[[__inverse_chi_squared_distrib][Yes][Yes][No]]
+[[__inverse_gamma_distrib][Yes][Yes][No]]
+[[__inverse_gaussian_distrib][Yes][Yes][No]]
+[[__laplace_distrib][Yes][Yes][Yes]]
+[[__logistic_distrib][Yes][Yes][Yes]]
+[[__lognormal_distrib][Yes][Yes][Yes]]
+[[__normal_distrib][Yes][Yes][Yes]]
+[[__pareto_distrib][Yes][Yes][Yes]]
+[[__poisson_distrib][Yes][Yes][No]]
+[[__rayleigh_distrib][Yes][Yes][Yes]]
+[[__triangular_distrib][Yes][Yes][Yes]]
+[[__uniform_distrib][Yes][Yes][Yes]]
+[[__weibull_distrib][Yes][Yes][Yes]]
+]
+
+If you see something that's not listed, then either we haven't got round to it yet, or else
+the function is too large for a CUDA device (the incomplete beta for example) or depends on
+non-CUDA usable standard library code such as <tuple> etc.  Pull requests or issues are welcome.
+
+[endsect]
+
+[/ cuda.qbk
+  Copyright 2018 John Maddock.
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
diff --git a/doc/quadrature/cuda_naive_monte_carlo.qbk b/doc/quadrature/cuda_naive_monte_carlo.qbk
@@ -0,0 +1,119 @@
+[/
+Copyright (c) 2018 Nick Thompson
+Use, modification and distribution are subject to the
+Boost Software License, Version 1.0. (See accompanying file
+LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+]
+
+
+[section:cuda_naive_monte_carlo CUDA Accelerated Naive Monte Carlo Integration]
+
+[heading Synopsis]
+
+    #include <boost/math/quadrature/cuda_naive_monte_carlo.hpp>
+    namespace boost { namespace math { namespace quadrature {
+
+    template <class Real, class F, class ThreadGen = thrust::random::taus88, class MasterGen = std::random_device>
+    struct cuda_naive_monte_carlo
+    {
+    public:
+        cuda_naive_monte_carlo(
+		   const F& integrand,
+           std::vector<std::pair<Real, Real>> const & bounds, 
+		   const MasterGen& seed);
+		cuda_naive_monte_carlo(
+		   const F& integrand, 
+		   std::vector<std::pair<Real, Real>> const & bounds)
+
+        Real integrate(
+		   Real error_request, 
+		   boost::uintmax_t calls_per_thread = 1024, 
+		   boost::uintmax_t max_calls_per_thread = 250000, 
+		   bool is_compensated = true);
+
+      Real variance() const;
+      Real current_error_estimate() const;
+      uint64_t calls() const;
+    };
+    }}} // namespaces
+
+[heading Description]
+
+The class `cuda_naive_monte_carlo` performs Monte-Carlo integration on a square integrable function /f/ on a domain [Omega].
+The theoretical background of Monte-Carlo integration is nicely discussed at [@https://en.wikipedia.org/wiki/Monte_Carlo_integration Wikipedia],
+and as such will not be discussed here.
+However, despite being "naive",
+it is a mistake to assume that naive Monte-Carlo integration is not powerful,
+as the simplicity of the method affords a robustness not easily provided by more sophisticated tools.
+The multithreaded nature of the routine allows us to compute a large number of sample points with great speed,
+and hence the slow convergence is mitigated by exploiting the full power of modern hardware.
+
+This class provides proof-of-principle CUDA-accelerated Monte-Carlo integration which will utilize all available
+CUDA threads to provide a significant performance advantage over non-CUDA code.  Since CUDA is used, only
+types `float` and `double` are supported.  Note that there are two random number generators specified in the
+template type-list: type `ThreadGen` is the per-thread random number generator, and must be a type which is
+usable on the CUDA device.  These per-thread generators are initialized via random seeds generated by an object
+of type `MasterGen`: this defaults to `std::random_device` but could equally be a psuedo-random number generator.
+It is important though to ensure that the per-thread random generators do not become correlated, or generate
+overlapping sequences.  For this reason type `ThreadGen` should have a long repeat period, and at the very least 
+be a different type from `MasterGen`.
+
+A call to member function `integrate` performs the actual integration, and also controls how the CUDA device is used:
+
+        Real integrate(
+		   Real error_request, 
+		   boost::uintmax_t calls_per_thread = 1024, 
+		   boost::uintmax_t max_calls_per_thread = 250000, 
+		   bool is_compensated = true);
+
+The parameters are as follows:
+
+* `error_request`: the desired absolute error in the result.
+* `calls_per_thread`: the number of calls to the integrand made by each thread in the first pass - 
+this first pass is used to get an estimate of the variance and calculate how many calls will
+be required by the second pass to reach the desired error bound.
+* `max_calls_per_thread`: the maximum number of calls of the integrand by each thread in any single CUDA
+invocation.  This parameter is used to prevent operating system timeouts from terminating the application.
+For example on Windows if the CUDA accelerated display driver is unresponsive for more than 2 seconds, then
+the application using it will simply be terminated.
+* `is_compensated`: when `true` each thread will use Kahan-style compensated addition to ensure accuracy.  When
+`false` then regular addition will be used for improved performance.
+
+For example:
+
+      // Define a function to integrate:
+      auto g = [] __device__ (const double* x)
+      {
+         constexpr const double pi = boost::math::constants::pi<double>();
+         constexpr const double A = 1.0 / (pi * pi * pi);
+         return A / (1.0 - cos(x[0])*cos(x[1])*cos(x[2]));
+      };
+      std::vector<std::pair<double, double>> bounds{ { 0, boost::math::constants::pi<double>() },{ 0, boost::math::constants::pi<double>() },{ 0, boost::math::constants::pi<double>() } };
+      double error_goal = 0.001;
+      cuda_naive_monte_carlo<double, decltype(g)> mc(g, bounds);
+
+      double result = mc.integrate(error_goal);
+
+      std::cout << "Integration result is: " << result << std::endl;
+
+First off, we define the function we wish to integrate.
+This function must accept a `const Real*`, and return a `Real`.
+In comparison to the regular non-CUDA code we loose a good deal of type safety
+as CUDA deals in raw pointers, not vectors - so it's up to the client to
+ensure that the number of elements accessed matches the number of dimensions passed
+to the integrators constructor.
+Also note that we've used a lambda expression for the integrand: this currently requires
+compilation with `-expt-extended-lambda`.
+
+Next, we define the domain of integration as a vector of pairs - in this case
+the domain is [0, PI] over 3 dimensions.
+
+   std::vector<std::pair<double, double>> bounds{ { 0, boost::math::constants::pi<double>() },{ 0, boost::math::constants::pi<double>() },{ 0, boost::math::constants::pi<double>() } };
+
+The call
+
+    naive_monte_carlo<double, decltype(g)> mc(g, bounds);
+
+creates an instance of the Monte-Carlo integrator, and the call to `integrate` then returns the result.
+
+[endsect]
diff --git a/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk b/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk
@@ -13,8 +13,8 @@ Compiled in optimise mode., _X86_SSE2]
 [table:elliptic root with radius 28 and arc length 300) for float, double, long double and cpp_bin_float_50 types, using _X86_SSE2
 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]]
 [[Algo     ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]]
-[[TOMS748  ][  6][  906][2.07][  0][ ][  9][ 1312][1.79][  1][ ][  9][ 1281][1.75][  1][ ][ 11][1690625][1.52][ -3][ ]]
-[[Newton   ][  3][  640][1.46][ -1][ ][  4][  875][1.19][  1][ ][  4][  843][1.15][  1][ ][  5][1368750][1.23][  0][ ]]
-[[Halley   ][  2][  437][[role blue 1.00]][  0][ ][  3][  734][[role blue 1.00]][  3][ ][  3][  734][[role blue 1.00]][  3][ ][  4][1109375][[role blue 1.00]][  0][ ]]
-[[Schr'''&#xf6;'''der][  3][  671][1.54][ -1][ ][  6][ 1296][1.77][  1][ ][  6][ 1406][1.92][  1][ ][  5][1462500][1.32][ -2][ ]]
+[[TOMS748  ][  5][  656][1.36][ -1][ ][  9][ 1125][2.12][  1][ ][  9][ 1000][1.83][  1][ ][ 11][881250][1.52][ -3][ ]]
+[[Newton   ][  3][  656][1.36][ -1][ ][  4][  625][1.18][  1][ ][  4][  640][1.17][  1][ ][  5][717187][1.24][  0][ ]]
+[[Halley   ][  2][  484][[role blue 1.00]][  0][ ][  3][  531][[role blue 1.00]][  3][ ][  3][  546][[role blue 1.00]][  3][ ][  4][579687][[role blue 1.00]][  0][ ]]
+[[Schr'''&#xf6;'''der][  3][  703][1.45][ -1][ ][  6][ 1031][1.94][  1][ ][  6][ 1015][1.86][  1][ ][  5][740625][1.28][ -2][ ]]
 ] [/end of table root]
diff --git a/doc/roots/root_comparison_tables_msvc.qbk b/doc/roots/root_comparison_tables_msvc.qbk
@@ -11,9 +11,9 @@ http://www.boost.org/LICENSE_1_0.txt).
 [table:cbrt_4 Cube root(28) for float, double, long double and cpp_bin_float_50
 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]]
 [[Algorithm][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]]
-[[cbrt     ][  0][78125][[role blue 1.0]][  0][ ][  0][62500][[role blue 1.0]][  1][ ][  0][93750][[role blue 1.0]][  1][ ][  0][11890625][1.1][  0][ ]]
-[[TOMS748  ][  8][468750][[role red 6.0]][ -1][ ][ 11][906250][[role red 15.]][  2][ ][ 11][906250][[role red 9.7]][  2][ ][  6][80859375][[role red 7.6]][ -2][ ]]
-[[Newton   ][  5][203125][2.6][  0][ ][  6][234375][3.8][  0][ ][  6][187500][2.0][  0][ ][  2][10640625][[role blue 1.0]][  0][ ]]
-[[Halley   ][  3][234375][3.0][  0][ ][  4][265625][[role red 4.3]][  0][ ][  4][234375][2.5][  0][ ][  2][26250000][2.5][  0][ ]]
-[[Schr'''&#xf6;'''der][  4][296875][3.8][  0][ ][  5][281250][[role red 4.5]][  0][ ][  5][234375][2.5][  0][ ][  2][32437500][3.0][  0][ ]]
+[[cbrt     ][  0][62500][[role blue 1.0]][  0][ ][  0][78125][[role blue 1.0]][  1][ ][  0][62500][[role blue 1.0]][  1][ ][  0][6375000][1.3][  0][ ]]
+[[TOMS748  ][  8][421875][[role red 6.8]][ -1][ ][ 11][734375][[role red 9.4]][  2][ ][ 11][687500][[role red 11.]][  2][ ][  6][39953125][[role red 8.2]][ -2][ ]]
+[[Newton   ][  5][171875][2.8][  0][ ][  6][171875][2.2][  0][ ][  6][140625][2.3][  0][ ][  2][4859375][[role blue 1.0]][  0][ ]]
+[[Halley   ][  3][156250][2.5][  0][ ][  4][203125][2.6][  0][ ][  4][171875][2.8][  0][ ][  2][12296875][2.5][  0][ ]]
+[[Schr'''&#xf6;'''der][  4][187500][3.0][  0][ ][  5][250000][3.2][  0][ ][  5][218750][3.5][  0][ ][  2][16406250][3.4][  0][ ]]
 ] [/end of table cbrt_4]