diff --git a/doc/math.qbk b/doc/math.qbk index 71da94bf04..2909ea3a60 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -159,6 +159,7 @@ and use the function's name as the link text.] [def __double_factorial [link math_toolkit.factorials.sf_double_factorial double_factorial]] [def __rising_factorial [link math_toolkit.factorials.sf_rising_factorial rising_factorial]] [def __falling_factorial [link math_toolkit.factorials.sf_falling_factorial falling_factorial]] +[def __binomial_coefficient [link math_toolkit.factorials.sf_binomial binomial_coefficient]] [/error functions] [def __erf [link math_toolkit.sf_erf.error_function erf]] @@ -185,6 +186,7 @@ and use the function's name as the link text.] [def __ellint_rf [link math_toolkit.ellint.ellint_carlson ellint_rf]] [def __ellint_rc [link math_toolkit.ellint.ellint_carlson ellint_rc]] [def __ellint_rd [link math_toolkit.ellint.ellint_carlson ellint_rd]] +[def __ellint_rg [link math_toolkit.ellint.ellint_carlson ellint_rg]] [def __ellint_1 [link math_toolkit.ellint.ellint_1 ellint_1]] [def __ellint_2 [link math_toolkit.ellint.ellint_2 ellint_2]] [def __ellint_3 [link math_toolkit.ellint.ellint_3 ellint_3]] @@ -240,6 +242,8 @@ and use the function's name as the link text.] [/sinus cardinals] [def __sinc_pi [link math_toolkit.sinc.sinc_pi sinc_pi]] [def __sinhc_pi [link math_toolkit.sinc.sinhc_pi sinhc_pi]] +[def __sin_pi [link math_toolkit.powers.sin_pi sin_pi]] +[def __cos_pi [link math_toolkit.powers.cos_pi cos_pi]] [/Inverse hyperbolics] [def __acosh [link math_toolkit.inv_hyper.acosh acosh]] @@ -260,6 +264,9 @@ and use the function's name as the link text.] [def __nextafter [link math_toolkit.next_float.nextafter nextafter]] [def __float_advance [link math_toolkit.next_float.float_advance float_advance]] [def __ulp [link math_toolkit.next_float.ulp ulp]] +[def __signbit [link math_toolkit.sign_functions signbit]] +[def __sign [link math_toolkit.sign_functions sign]] +[def __changesign [link math_toolkit.sign_functions changesign]] [/powers etc] [def __expm1 [link math_toolkit.powers.expm1 expm1]] @@ -324,7 +331,7 @@ and use the function's name as the link text.] [def __algorithms [link math_toolkit.dist_ref.dist_algorithms algorithms]] [/ distribution def names end in distrib to avoid clashes] -[def __arcsine_distrib [link math_toolkit.dist_ref.dists.arcsine_dist Arcsine Distribution]] +[def __arcsine_distrib [link math_toolkit.dist_ref.dists.arcine_dist Arcsine Distribution]] [def __beta_distrib [link math_toolkit.dist_ref.dists.beta_dist Beta Distribution]] [def __binomial_distrib [link math_toolkit.dist_ref.dists.binomial_dist Binomial Distribution]] [def __cauchy_distrib [link math_toolkit.dist_ref.dists.cauchy_dist Cauchy Distribution]] @@ -351,8 +358,11 @@ and use the function's name as the link text.] [def __normal_distrib [link math_toolkit.dist_ref.dists.normal_dist Normal Distribution]] [def __poisson_distrib [link math_toolkit.dist_ref.dists.poisson_dist Poisson Distribution]] [def __pareto_distrib [link math_toolkit.dist_ref.dists.pareto Pareto Distribution]] +[def __rayleigh_distrib [link math_toolkit.dist_ref.dists.rayleigh Rayleigh Distribution]] [def __students_t_distrib [link math_toolkit.dist_ref.dists.students_t_dist Students t Distribution]] [def __skew_normal_distrib [link math_toolkit.dist_ref.dists.skew_normal_dist Skew Normal Distribution]] +[def __triangular_distrib [link math_toolkit.dist_ref.dists.triangular_dist Triangular Distribution]] +[def __uniform_distrib [link math_toolkit.dist_ref.dists.uniform_dist Uniform Distribution]] [def __weibull_distrib [link math_toolkit.dist_ref.dists.weibull_dist Weibull Distribution]] [/links to policy] @@ -487,6 +497,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include overview/structure.qbk] [/getting about, directory and file structure.] [include overview/result_type_calc.qbk] [include overview/error_handling.qbk] +[include overview/cuda.qbk] [section:compilers_overview Compilers] [compilers_overview] @@ -637,6 +648,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include quadrature/gauss_kronrod.qbk] [include quadrature/double_exponential.qbk] [include quadrature/naive_monte_carlo.qbk] +[include quadrature/cuda_naive_monte_carlo.qbk] [include differentiation/numerical_differentiation.qbk] [endmathpart] diff --git a/doc/overview/cuda.qbk b/doc/overview/cuda.qbk new file mode 100644 index 0000000000..c4012f97fb --- /dev/null +++ b/doc/overview/cuda.qbk @@ -0,0 +1,58 @@ +[section:cuda CUDA support] + +This library does not (yet) use CUDA internally, however there are a number of functions which are +marked up for use on CUDA devices. + +Currently nearly all of our internal helper functions, plus the mathematical __constants and the __boost_math_fp +are marked as safe for use on CUDA devices. + +In addition the following special functions are all safe to use in CUDA device code: + +__beta, __ibeta_derivative, __binomial_coefficient, __erf, __erfc, __erf_inv, __erfc_inv, +__ellint_rf, __ellint_rd, __ellint_rc, __ellint_rj, __ellint_rg, __ellint_1, __ellint_2, __ellint_3, +__ellint_rf, __ellint_rd, __ellint_rc, __ellint_rj, __ellint_rg, __ellint_1, __ellint_2, __ellint_3, +__ellint_d, __jacobi_zeta, __heuman_lambda, __factorial, __unchecked_factorial, __double_factorial, +__falling_factorial, __rising_factorial, __tgamma, __tgamma1pm1, __lgamma, __tgamma_lower, +__gamma_q, __gamma_p, __tgamma_delta_ratio, __tgamma_ratio, __gamma_p_derivative, __cbrt, __log1p, +__expm1, __powm1, __sinc_pi, __sinhc_pi, __asinh, __acosh, __atanh, __sin_pi, __cos_pi, +__fpclassify, __isnan, __isinf, __isnormal, __isfinite, __signbit, __sign, __changesign, __pow, +__nextafter, __float_next, __float_prior, __float_distance, __float_advance. + +The following distribution functions are also supported: + +[table +[[Distribution][pdf][cdf][qualtile]] +[[__arcsine_distrib][Yes][Yes][Yes]] +[[__cauchy_distrib][Yes][Yes][Yes]] +[[__chi_squared_distrib][Yes][Yes][No]] +[[__extreme_distrib][Yes][Yes][Yes]] +[[__exp_distrib][Yes][Yes][Yes]] +[[__gamma_distrib][Yes][Yes][No]] +[[__geometric_distrib][Yes][Yes][No]] +[[__inverse_chi_squared_distrib][Yes][Yes][No]] +[[__inverse_gamma_distrib][Yes][Yes][No]] +[[__inverse_gaussian_distrib][Yes][Yes][No]] +[[__laplace_distrib][Yes][Yes][Yes]] +[[__logistic_distrib][Yes][Yes][Yes]] +[[__lognormal_distrib][Yes][Yes][Yes]] +[[__normal_distrib][Yes][Yes][Yes]] +[[__pareto_distrib][Yes][Yes][Yes]] +[[__poisson_distrib][Yes][Yes][No]] +[[__rayleigh_distrib][Yes][Yes][Yes]] +[[__triangular_distrib][Yes][Yes][Yes]] +[[__uniform_distrib][Yes][Yes][Yes]] +[[__weibull_distrib][Yes][Yes][Yes]] +] + +If you see something that's not listed, then either we haven't got round to it yet, or else +the function is too large for a CUDA device (the incomplete beta for example) or depends on +non-CUDA usable standard library code such as etc. Pull requests or issues are welcome. + +[endsect] + +[/ cuda.qbk + Copyright 2018 John Maddock. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/doc/quadrature/cuda_naive_monte_carlo.qbk b/doc/quadrature/cuda_naive_monte_carlo.qbk new file mode 100644 index 0000000000..074f1c7a7f --- /dev/null +++ b/doc/quadrature/cuda_naive_monte_carlo.qbk @@ -0,0 +1,119 @@ +[/ +Copyright (c) 2018 Nick Thompson +Use, modification and distribution are subject to the +Boost Software License, Version 1.0. (See accompanying file +LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + + +[section:cuda_naive_monte_carlo CUDA Accelerated Naive Monte Carlo Integration] + +[heading Synopsis] + + #include + namespace boost { namespace math { namespace quadrature { + + template + struct cuda_naive_monte_carlo + { + public: + cuda_naive_monte_carlo( + const F& integrand, + std::vector> const & bounds, + const MasterGen& seed); + cuda_naive_monte_carlo( + const F& integrand, + std::vector> const & bounds) + + Real integrate( + Real error_request, + boost::uintmax_t calls_per_thread = 1024, + boost::uintmax_t max_calls_per_thread = 250000, + bool is_compensated = true); + + Real variance() const; + Real current_error_estimate() const; + uint64_t calls() const; + }; + }}} // namespaces + +[heading Description] + +The class `cuda_naive_monte_carlo` performs Monte-Carlo integration on a square integrable function /f/ on a domain [Omega]. +The theoretical background of Monte-Carlo integration is nicely discussed at [@https://en.wikipedia.org/wiki/Monte_Carlo_integration Wikipedia], +and as such will not be discussed here. +However, despite being "naive", +it is a mistake to assume that naive Monte-Carlo integration is not powerful, +as the simplicity of the method affords a robustness not easily provided by more sophisticated tools. +The multithreaded nature of the routine allows us to compute a large number of sample points with great speed, +and hence the slow convergence is mitigated by exploiting the full power of modern hardware. + +This class provides proof-of-principle CUDA-accelerated Monte-Carlo integration which will utilize all available +CUDA threads to provide a significant performance advantage over non-CUDA code. Since CUDA is used, only +types `float` and `double` are supported. Note that there are two random number generators specified in the +template type-list: type `ThreadGen` is the per-thread random number generator, and must be a type which is +usable on the CUDA device. These per-thread generators are initialized via random seeds generated by an object +of type `MasterGen`: this defaults to `std::random_device` but could equally be a psuedo-random number generator. +It is important though to ensure that the per-thread random generators do not become correlated, or generate +overlapping sequences. For this reason type `ThreadGen` should have a long repeat period, and at the very least +be a different type from `MasterGen`. + +A call to member function `integrate` performs the actual integration, and also controls how the CUDA device is used: + + Real integrate( + Real error_request, + boost::uintmax_t calls_per_thread = 1024, + boost::uintmax_t max_calls_per_thread = 250000, + bool is_compensated = true); + +The parameters are as follows: + +* `error_request`: the desired absolute error in the result. +* `calls_per_thread`: the number of calls to the integrand made by each thread in the first pass - +this first pass is used to get an estimate of the variance and calculate how many calls will +be required by the second pass to reach the desired error bound. +* `max_calls_per_thread`: the maximum number of calls of the integrand by each thread in any single CUDA +invocation. This parameter is used to prevent operating system timeouts from terminating the application. +For example on Windows if the CUDA accelerated display driver is unresponsive for more than 2 seconds, then +the application using it will simply be terminated. +* `is_compensated`: when `true` each thread will use Kahan-style compensated addition to ensure accuracy. When +`false` then regular addition will be used for improved performance. + +For example: + + // Define a function to integrate: + auto g = [] __device__ (const double* x) + { + constexpr const double pi = boost::math::constants::pi(); + constexpr const double A = 1.0 / (pi * pi * pi); + return A / (1.0 - cos(x[0])*cos(x[1])*cos(x[2])); + }; + std::vector> bounds{ { 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() } }; + double error_goal = 0.001; + cuda_naive_monte_carlo mc(g, bounds); + + double result = mc.integrate(error_goal); + + std::cout << "Integration result is: " << result << std::endl; + +First off, we define the function we wish to integrate. +This function must accept a `const Real*`, and return a `Real`. +In comparison to the regular non-CUDA code we loose a good deal of type safety +as CUDA deals in raw pointers, not vectors - so it's up to the client to +ensure that the number of elements accessed matches the number of dimensions passed +to the integrators constructor. +Also note that we've used a lambda expression for the integrand: this currently requires +compilation with `-expt-extended-lambda`. + +Next, we define the domain of integration as a vector of pairs - in this case +the domain is [0, PI] over 3 dimensions. + + std::vector> bounds{ { 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() } }; + +The call + + naive_monte_carlo mc(g, bounds); + +creates an instance of the Monte-Carlo integrator, and the call to `integrate` then returns the result. + +[endsect] diff --git a/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk b/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk index a5210e7fdf..915d4e393b 100644 --- a/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk +++ b/doc/roots/elliptic_table_100_msvc_X86_SSE2.qbk @@ -13,8 +13,8 @@ Compiled in optimise mode., _X86_SSE2] [table:elliptic root with radius 28 and arc length 300) for float, double, long double and cpp_bin_float_50 types, using _X86_SSE2 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algo ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[TOMS748 ][ 6][ 906][2.07][ 0][ ][ 9][ 1312][1.79][ 1][ ][ 9][ 1281][1.75][ 1][ ][ 11][1690625][1.52][ -3][ ]] -[[Newton ][ 3][ 640][1.46][ -1][ ][ 4][ 875][1.19][ 1][ ][ 4][ 843][1.15][ 1][ ][ 5][1368750][1.23][ 0][ ]] -[[Halley ][ 2][ 437][[role blue 1.00]][ 0][ ][ 3][ 734][[role blue 1.00]][ 3][ ][ 3][ 734][[role blue 1.00]][ 3][ ][ 4][1109375][[role blue 1.00]][ 0][ ]] -[[Schr'''ö'''der][ 3][ 671][1.54][ -1][ ][ 6][ 1296][1.77][ 1][ ][ 6][ 1406][1.92][ 1][ ][ 5][1462500][1.32][ -2][ ]] +[[TOMS748 ][ 5][ 656][1.36][ -1][ ][ 9][ 1125][2.12][ 1][ ][ 9][ 1000][1.83][ 1][ ][ 11][881250][1.52][ -3][ ]] +[[Newton ][ 3][ 656][1.36][ -1][ ][ 4][ 625][1.18][ 1][ ][ 4][ 640][1.17][ 1][ ][ 5][717187][1.24][ 0][ ]] +[[Halley ][ 2][ 484][[role blue 1.00]][ 0][ ][ 3][ 531][[role blue 1.00]][ 3][ ][ 3][ 546][[role blue 1.00]][ 3][ ][ 4][579687][[role blue 1.00]][ 0][ ]] +[[Schr'''ö'''der][ 3][ 703][1.45][ -1][ ][ 6][ 1031][1.94][ 1][ ][ 6][ 1015][1.86][ 1][ ][ 5][740625][1.28][ -2][ ]] ] [/end of table root] diff --git a/doc/roots/root_comparison_tables_msvc.qbk b/doc/roots/root_comparison_tables_msvc.qbk index 9a4846b0fc..7f411f3eae 100644 --- a/doc/roots/root_comparison_tables_msvc.qbk +++ b/doc/roots/root_comparison_tables_msvc.qbk @@ -11,9 +11,9 @@ http://www.boost.org/LICENSE_1_0.txt). [table:cbrt_4 Cube root(28) for float, double, long double and cpp_bin_float_50 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algorithm][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[cbrt ][ 0][78125][[role blue 1.0]][ 0][ ][ 0][62500][[role blue 1.0]][ 1][ ][ 0][93750][[role blue 1.0]][ 1][ ][ 0][11890625][1.1][ 0][ ]] -[[TOMS748 ][ 8][468750][[role red 6.0]][ -1][ ][ 11][906250][[role red 15.]][ 2][ ][ 11][906250][[role red 9.7]][ 2][ ][ 6][80859375][[role red 7.6]][ -2][ ]] -[[Newton ][ 5][203125][2.6][ 0][ ][ 6][234375][3.8][ 0][ ][ 6][187500][2.0][ 0][ ][ 2][10640625][[role blue 1.0]][ 0][ ]] -[[Halley ][ 3][234375][3.0][ 0][ ][ 4][265625][[role red 4.3]][ 0][ ][ 4][234375][2.5][ 0][ ][ 2][26250000][2.5][ 0][ ]] -[[Schr'''ö'''der][ 4][296875][3.8][ 0][ ][ 5][281250][[role red 4.5]][ 0][ ][ 5][234375][2.5][ 0][ ][ 2][32437500][3.0][ 0][ ]] +[[cbrt ][ 0][62500][[role blue 1.0]][ 0][ ][ 0][78125][[role blue 1.0]][ 1][ ][ 0][62500][[role blue 1.0]][ 1][ ][ 0][6375000][1.3][ 0][ ]] +[[TOMS748 ][ 8][421875][[role red 6.8]][ -1][ ][ 11][734375][[role red 9.4]][ 2][ ][ 11][687500][[role red 11.]][ 2][ ][ 6][39953125][[role red 8.2]][ -2][ ]] +[[Newton ][ 5][171875][2.8][ 0][ ][ 6][171875][2.2][ 0][ ][ 6][140625][2.3][ 0][ ][ 2][4859375][[role blue 1.0]][ 0][ ]] +[[Halley ][ 3][156250][2.5][ 0][ ][ 4][203125][2.6][ 0][ ][ 4][171875][2.8][ 0][ ][ 2][12296875][2.5][ 0][ ]] +[[Schr'''ö'''der][ 4][187500][3.0][ 0][ ][ 5][250000][3.2][ 0][ ][ 5][218750][3.5][ 0][ ][ 2][16406250][3.4][ 0][ ]] ] [/end of table cbrt_4] diff --git a/doc/roots/roots_table_100_msvc_X86_SSE2.qbk b/doc/roots/roots_table_100_msvc_X86_SSE2.qbk index dee6062ac5..deeba0f5b2 100644 --- a/doc/roots/roots_table_100_msvc_X86_SSE2.qbk +++ b/doc/roots/roots_table_100_msvc_X86_SSE2.qbk @@ -14,24 +14,24 @@ Fraction of full accuracy 1 [table:root_5 5th root(28) for float, double, long double and cpp_bin_float_50 types, using _X86_SSE2 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algo ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[TOMS748 ][ 7][ 457][2.00][ 0][ ][ 11][ 860][3.54][ 1][ ][ 11][ 806][3.02][ 1][ ][ 12][226875][[role red 8.11]][ 0][ ]] -[[Newton ][ 3][ 228][[role blue 1.00]][ 0][ ][ 4][ 243][[role blue 1.00]][ -1][ ][ 4][ 298][1.12][ -1][ ][ 6][27968][[role blue 1.00]][ 0][ ]] -[[Halley ][ 2][ 250][1.10][ 0][ ][ 3][ 268][1.10][ 0][ ][ 3][ 267][[role blue 1.00]][ 0][ ][ 4][52812][1.89][ 0][ ]] -[[Schr'''ö'''der][ 2][ 256][1.12][ 0][ ][ 3][ 271][1.12][ -1][ ][ 3][ 270][[role blue 1.01]][ -1][ ][ 4][61406][2.20][ 0][ ]] +[[TOMS748 ][ 7][ 296][1.76][ 0][ ][ 11][ 576][3.16][ 1][ ][ 11][ 568][3.17][ 1][ ][ 12][104531][[role red 7.78]][ 0][ ]] +[[Newton ][ 3][ 168][[role blue 1.00]][ 0][ ][ 4][ 182][[role blue 1.00]][ -1][ ][ 4][ 179][[role blue 1.00]][ -1][ ][ 6][13437][[role blue 1.00]][ 0][ ]] +[[Halley ][ 2][ 179][1.07][ 0][ ][ 3][ 204][1.12][ 0][ ][ 3][ 212][1.18][ 0][ ][ 4][23125][1.72][ 0][ ]] +[[Schr'''ö'''der][ 2][ 178][1.06][ 0][ ][ 3][ 218][1.20][ -1][ ][ 3][ 214][1.20][ -1][ ][ 4][28437][2.12][ 0][ ]] ] [/end of table root] [table:root_7 7th root(28) for float, double, long double and cpp_bin_float_50 types, using _X86_SSE2 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algo ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[TOMS748 ][ 12][ 825][3.06][ 1][ ][ 15][ 1145][[role red 4.06]][ 2][ ][ 15][ 1159][[role red 4.17]][ 2][ ][ 14][295781][[role red 8.12]][ 0][ ]] -[[Newton ][ 5][ 270][[role blue 1.00]][ 0][ ][ 6][ 282][[role blue 1.00]][ 0][ ][ 6][ 278][[role blue 1.00]][ 0][ ][ 8][36406][[role blue 1.00]][ 0][ ]] -[[Halley ][ 4][ 303][1.12][ 0][ ][ 5][ 329][1.17][ 0][ ][ 5][ 335][1.21][ 0][ ][ 6][78281][2.15][ 0][ ]] -[[Schr'''ö'''der][ 5][ 340][1.26][ 0][ ][ 6][ 432][1.53][ 0][ ][ 6][ 367][1.32][ 0][ ][ 7][85156][2.34][ 0][ ]] +[[TOMS748 ][ 12][ 439][2.45][ 1][ ][ 15][ 715][3.65][ 2][ ][ 15][ 729][3.84][ 2][ ][ 14][126250][[role red 7.28]][ 0][ ]] +[[Newton ][ 5][ 179][[role blue 1.00]][ 0][ ][ 6][ 196][[role blue 1.00]][ 0][ ][ 6][ 190][[role blue 1.00]][ 0][ ][ 8][17343][[role blue 1.00]][ 0][ ]] +[[Halley ][ 4][ 200][1.12][ 0][ ][ 5][ 273][1.39][ 0][ ][ 5][ 229][1.21][ 0][ ][ 6][35468][2.05][ 0][ ]] +[[Schr'''ö'''der][ 5][ 229][1.28][ 0][ ][ 6][ 314][1.60][ 0][ ][ 6][ 275][1.45][ 0][ ][ 7][49375][2.85][ 0][ ]] ] [/end of table root] [table:root_11 11th root(28) for float, double, long double and cpp_bin_float_50 types, using _X86_SSE2 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algo ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[TOMS748 ][ 12][ 714][3.16][ -2][ ][ 14][ 909][[role red 4.19]][ 2][ ][ 14][ 793][3.69][ 2][ ][ 17][211718][[role red 9.28]][ 2][ ]] -[[Newton ][ 6][ 226][[role blue 1.00]][ 0][ ][ 7][ 217][[role blue 1.00]][ 0][ ][ 7][ 215][[role blue 1.00]][ 0][ ][ 9][22812][[role blue 1.00]][ 0][ ]] -[[Halley ][ 4][ 262][1.16][ -1][ ][ 5][ 260][1.20][ 0][ ][ 5][ 260][1.21][ 0][ ][ 6][40781][1.79][ 0][ ]] -[[Schr'''ö'''der][ 6][ 332][1.47][ 0][ ][ 7][ 314][1.45][ 0][ ][ 7][ 310][1.44][ 0][ ][ 8][67187][2.95][ 0][ ]] +[[TOMS748 ][ 12][ 529][2.80][ -2][ ][ 14][ 734][3.50][ 2][ ][ 14][ 729][3.47][ 2][ ][ 17][190937][[role red 9.33]][ 2][ ]] +[[Newton ][ 6][ 189][[role blue 1.00]][ 0][ ][ 7][ 210][[role blue 1.00]][ 0][ ][ 7][ 210][[role blue 1.00]][ 0][ ][ 9][20468][[role blue 1.00]][ 0][ ]] +[[Halley ][ 4][ 204][1.08][ -1][ ][ 5][ 246][1.17][ 0][ ][ 5][ 246][1.17][ 0][ ][ 6][36562][1.79][ 0][ ]] +[[Schr'''ö'''der][ 6][ 245][1.30][ 0][ ][ 7][ 292][1.39][ 0][ ][ 7][ 292][1.39][ 0][ ][ 8][60625][2.96][ 0][ ]] ] [/end of table root] diff --git a/include/boost/math/bindings/rr.hpp b/include/boost/math/bindings/rr.hpp index 6ec79f953d..dbd5cb3673 100644 --- a/include/boost/math/bindings/rr.hpp +++ b/include/boost/math/bindings/rr.hpp @@ -395,8 +395,8 @@ inline RR tanh(RR a) *exp -= 1; r.value().e += 1; } - BOOST_ASSERT(r < 1); - BOOST_ASSERT(r >= 0.5); + BOOST_MATH_ASSERT(r < 1); + BOOST_MATH_ASSERT(r >= 0.5); return r; } inline RR ldexp(RR r, int exp) diff --git a/include/boost/math/complex/acos.hpp b/include/boost/math/complex/acos.hpp index a911756949..92c096c621 100644 --- a/include/boost/math/complex/acos.hpp +++ b/include/boost/math/complex/acos.hpp @@ -202,7 +202,7 @@ std::complex acos(const std::complex& z) // but we have no way to test that here, so for now just assert // on the assumption: // - BOOST_ASSERT(x == 1); + BOOST_MATH_ASSERT(x == 1); real = std::sqrt(y); imag = std::sqrt(y); } diff --git a/include/boost/math/complex/asin.hpp b/include/boost/math/complex/asin.hpp index 087c3b51ef..f1b07e869b 100644 --- a/include/boost/math/complex/asin.hpp +++ b/include/boost/math/complex/asin.hpp @@ -209,7 +209,7 @@ inline std::complex asin(const std::complex& z) // but we have no way to test that here, so for now just assert // on the assumption: // - BOOST_ASSERT(x == 1); + BOOST_MATH_ASSERT(x == 1); real = half_pi - std::sqrt(y); imag = std::sqrt(y); } diff --git a/include/boost/math/constants/constants.hpp b/include/boost/math/constants/constants.hpp index 8c5c4105d4..5daef01503 100644 --- a/include/boost/math/constants/constants.hpp +++ b/include/boost/math/constants/constants.hpp @@ -228,11 +228,11 @@ namespace boost{ namespace math constant_initializer::get_from_string >::force_instantiate();\ return get_from_string();\ }\ - static inline BOOST_CONSTEXPR T get(const mpl::int_) BOOST_NOEXCEPT\ + static inline BOOST_GPU_ENABLED BOOST_CONSTEXPR T get(const mpl::int_) BOOST_NOEXCEPT\ { return BOOST_JOIN(x, F); }\ - static inline BOOST_CONSTEXPR T get(const mpl::int_&) BOOST_NOEXCEPT\ + static inline BOOST_GPU_ENABLED BOOST_CONSTEXPR T get(const mpl::int_&) BOOST_NOEXCEPT\ { return x; }\ - static inline BOOST_CONSTEXPR T get(const mpl::int_&) BOOST_NOEXCEPT\ + static inline BOOST_GPU_ENABLED BOOST_CONSTEXPR T get(const mpl::int_&) BOOST_NOEXCEPT\ { return BOOST_JOIN(x, L); }\ BOOST_MATH_FLOAT128_CONSTANT_OVERLOAD(x) \ template static inline const T& get(const mpl::int_&)\ @@ -250,9 +250,9 @@ namespace boost{ namespace math \ \ /* The actual forwarding function: */ \ - template inline BOOST_CONSTEXPR typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\ + template inline BOOST_GPU_ENABLED BOOST_CONSTEXPR typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\ { return detail:: BOOST_JOIN(constant_, name)::get(typename construction_traits::type()); }\ - template inline BOOST_CONSTEXPR typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\ + template inline BOOST_GPU_ENABLED BOOST_CONSTEXPR typename detail::constant_return::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\ { return name >(); }\ \ \ diff --git a/include/boost/math/distributions/arcsine.hpp b/include/boost/math/distributions/arcsine.hpp index 8aad5b2d0b..ac5d6e5658 100644 --- a/include/boost/math/distributions/arcsine.hpp +++ b/include/boost/math/distributions/arcsine.hpp @@ -54,7 +54,7 @@ namespace boost // Common error checking routines for arcsine distribution functions: // Duplicating for x_min and x_max provides specific error messages. template - inline bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol) { if (!(boost::math::isfinite)(x)) { @@ -67,7 +67,7 @@ namespace boost } // bool check_x_min template - inline bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol) { if (!(boost::math::isfinite)(x)) { @@ -81,11 +81,16 @@ namespace boost template - inline bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) { // Check x_min < x_max if (x_min >= x_max) { - std::string msg = "x_max argument is %1%, but must be > x_min = " + lexical_cast(x_min) + "!"; +#ifdef __CUDA_ARCH__ + *result = policies::raise_domain_error( + function, + "x_max value %1% is out of range", x_max, pol); +#else + std::string msg = "x_max argument is %1%, but must be > x_min = " + lexical_cast(x_min) + "!"; *result = policies::raise_domain_error( function, msg.c_str(), x_max, pol); @@ -94,13 +99,14 @@ namespace boost // But would require replication of all helpers functions in /policies/error_handling.hpp for two values, // as well as two value versions of raise_error, raise_domain_error and do_format ... // so use slightly hacky lexical_cast to string instead. +#endif return false; } return true; } // bool check_x_minmax template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { if ((p < 0) || (p > 1) || !(boost::math::isfinite)(p)) { @@ -113,7 +119,7 @@ namespace boost } // bool check_prob template - inline bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol) { // Check x finite and x_min < x < x_max. if (!(boost::math::isfinite)(x)) { @@ -137,7 +143,7 @@ namespace boost } // bool check_x template - inline bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol) { // Check both x_min and x_max finite, and x_min < x_max. return check_x_min(function, x_min, result, pol) && check_x_max(function, x_max, result, pol) @@ -145,14 +151,14 @@ namespace boost } // bool check_dist template - inline bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol) { return check_dist(function, x_min, x_max, result, pol) && arcsine_detail::check_x(function, x_min, x_max, x, result, pol); } // bool check_dist_and_x template - inline bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol) { return check_dist(function, x_min, x_max, result, pol) && check_prob(function, p, result, pol); @@ -167,7 +173,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max) + BOOST_GPU_ENABLED arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max) { // Default beta (alpha = beta = 0.5) is standard arcsine with x_min = 0, x_max = 1. // Generalized to allow x_min and x_max to be specified. RealType result; @@ -178,11 +184,11 @@ namespace boost &result, Policy()); } // arcsine_distribution constructor. // Accessor functions: - RealType x_min() const + BOOST_GPU_ENABLED RealType x_min() const { return m_x_min; } - RealType x_max() const + BOOST_GPU_ENABLED RealType x_max() const { return m_x_max; } @@ -197,21 +203,21 @@ namespace boost template - inline const std::pair range(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED const std::pair range(const arcsine_distribution& dist) { // Range of permissible values for random variable x. using boost::math::tools::max_value; return std::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); } template - inline const std::pair support(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED const std::pair support(const arcsine_distribution& dist) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. return std::pair(static_cast(dist.x_min()), static_cast(dist.x_max())); } template - inline RealType mean(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const arcsine_distribution& dist) { // Mean of arcsine distribution . RealType result; RealType x_min = dist.x_min(); @@ -230,7 +236,7 @@ namespace boost } // mean template - inline RealType variance(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const arcsine_distribution& dist) { // Variance of standard arcsine distribution = (1-0)/8 = 0.125. RealType result; RealType x_min = dist.x_min(); @@ -248,7 +254,7 @@ namespace boost } // variance template - inline RealType mode(const arcsine_distribution& /* dist */) + inline BOOST_GPU_ENABLED RealType mode(const arcsine_distribution& /* dist */) { //There are always [*two] values for the mode, at ['x_min] and at ['x_max], default 0 and 1, // so instead we raise the exception domain_error. return policies::raise_domain_error( @@ -259,7 +265,7 @@ namespace boost } // mode template - inline RealType median(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const arcsine_distribution& dist) { // Median of arcsine distribution (a + b) / 2 == mean. RealType x_min = dist.x_min(); RealType x_max = dist.x_max(); @@ -277,7 +283,7 @@ namespace boost } template - inline RealType skewness(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -296,7 +302,7 @@ namespace boost } // skewness template - inline RealType kurtosis_excess(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -316,7 +322,7 @@ namespace boost } // kurtosis_excess template - inline RealType kurtosis(const arcsine_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const arcsine_distribution& dist) { RealType result; RealType x_min = dist.x_min(); @@ -336,12 +342,12 @@ namespace boost } // kurtosis template - inline RealType pdf(const arcsine_distribution& dist, const RealType& xx) + inline BOOST_GPU_ENABLED RealType pdf(const arcsine_distribution& dist, const RealType& xx) { // Probability Density/Mass Function arcsine. BOOST_FPU_EXCEPTION_GUARD BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)"; RealType lo = dist.x_min(); RealType hi = dist.x_max(); @@ -362,11 +368,11 @@ namespace boost } // pdf template - inline RealType cdf(const arcsine_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const arcsine_distribution& dist, const RealType& x) { // Cumulative Distribution Function arcsine. BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; RealType x_min = dist.x_min(); RealType x_max = dist.x_max(); @@ -395,10 +401,10 @@ namespace boost } // arcsine cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function arcsine. BOOST_MATH_STD_USING // For ADL of std functions. - static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)"; RealType x = c.param; arcsine_distribution const& dist = c.dist; @@ -431,7 +437,7 @@ namespace boost } // arcine ccdf template - inline RealType quantile(const arcsine_distribution& dist, const RealType& p) + inline BOOST_GPU_ENABLED RealType quantile(const arcsine_distribution& dist, const RealType& p) { // Quantile or Percent Point arcsine function or // Inverse Cumulative probability distribution function CDF. @@ -445,7 +451,7 @@ namespace boost using boost::math::constants::half_pi; - static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; RealType result = 0; // of argument checks: RealType x_min = dist.x_min(); @@ -475,7 +481,7 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { // Complement Quantile or Percent Point arcsine function. // Return the number of expected x for a given @@ -483,7 +489,7 @@ namespace boost BOOST_MATH_STD_USING // For ADL of std functions. using boost::math::constants::half_pi; - static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)"; // Error checks: RealType q = c.param; diff --git a/include/boost/math/distributions/cauchy.hpp b/include/boost/math/distributions/cauchy.hpp index 5a3a64f0f2..74a052bf95 100644 --- a/include/boost/math/distributions/cauchy.hpp +++ b/include/boost/math/distributions/cauchy.hpp @@ -31,7 +31,7 @@ namespace detail { template -RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) +BOOST_GPU_ENABLED RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) { // // This calculates the cdf of the Cauchy distribution and/or its complement. @@ -55,7 +55,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy // to get the result. // BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); @@ -67,6 +67,12 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy { return result; } +#ifdef __CUDA_ARCH__ + if(x > tools::max_value()) + return static_cast((complement) ? 0 : 1); + if(x < -tools::max_value()) + return static_cast((complement) ? 1 : 0); +#else if(std::numeric_limits::has_infinity && x == std::numeric_limits::infinity()) { // cdf +infinity is unity. return static_cast((complement) ? 0 : 1); @@ -75,6 +81,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy { // cdf -infinity is zero. return static_cast((complement) ? 1 : 0); } +#endif if(false == detail::check_x(function, x, &result, Policy())) { // Catches x == NaN return result; @@ -89,7 +96,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy } // cdf template -RealType quantile_imp( +BOOST_GPU_ENABLED RealType quantile_imp( const cauchy_distribution& dist, const RealType& p, bool complement) @@ -102,7 +109,7 @@ RealType quantile_imp( // mid-point of the distribution. This is either added or subtracted // from the location parameter depending on whether `complement` is true. // - static const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)"; BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; @@ -152,20 +159,20 @@ class cauchy_distribution typedef RealType value_type; typedef Policy policy_type; - cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_GPU_ENABLED cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) : m_a(l_location), m_hg(l_scale) { - static const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; RealType result; detail::check_location(function, l_location, &result, Policy()); detail::check_scale(function, l_scale, &result, Policy()); } // cauchy_distribution - RealType location()const + BOOST_GPU_ENABLED RealType location()const { return m_a; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_hg; } @@ -178,7 +185,7 @@ class cauchy_distribution typedef cauchy_distribution cauchy; template -inline const std::pair range(const cauchy_distribution&) +inline BOOST_GPU_ENABLED const std::pair range(const cauchy_distribution&) { // Range of permissible values for random variable x. if (std::numeric_limits::has_infinity) { @@ -192,7 +199,7 @@ inline const std::pair range(const cauchy_distribution -inline const std::pair support(const cauchy_distribution& ) +inline BOOST_GPU_ENABLED const std::pair support(const cauchy_distribution& ) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. if (std::numeric_limits::has_infinity) @@ -207,11 +214,11 @@ inline const std::pair support(const cauchy_distribution -inline RealType pdf(const cauchy_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const cauchy_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); @@ -244,31 +251,31 @@ inline RealType pdf(const cauchy_distribution& dist, const Rea } // pdf template -inline RealType cdf(const cauchy_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const cauchy_distribution& dist, const RealType& x) { return detail::cdf_imp(dist, x, false); } // cdf template -inline RealType quantile(const cauchy_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const cauchy_distribution& dist, const RealType& p) { return detail::quantile_imp(dist, p, false); } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { return detail::cdf_imp(c.dist, c.param, true); } // cdf complement template -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { return detail::quantile_imp(c.dist, c.param, true); } // quantile complement template -inline RealType mean(const cauchy_distribution&) +inline BOOST_GPU_ENABLED RealType mean(const cauchy_distribution&) { // There is no mean: typedef typename Policy::assert_undefined_type assert_type; BOOST_STATIC_ASSERT(assert_type::value == 0); @@ -281,7 +288,7 @@ inline RealType mean(const cauchy_distribution&) } template -inline RealType variance(const cauchy_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType variance(const cauchy_distribution& /*dist*/) { // There is no variance: typedef typename Policy::assert_undefined_type assert_type; @@ -295,18 +302,18 @@ inline RealType variance(const cauchy_distribution& /*dist*/) } template -inline RealType mode(const cauchy_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const cauchy_distribution& dist) { return dist.location(); } template -inline RealType median(const cauchy_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const cauchy_distribution& dist) { return dist.location(); } template -inline RealType skewness(const cauchy_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const cauchy_distribution& /*dist*/) { // There is no skewness: typedef typename Policy::assert_undefined_type assert_type; @@ -320,7 +327,7 @@ inline RealType skewness(const cauchy_distribution& /*dist*/) } template -inline RealType kurtosis(const cauchy_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const cauchy_distribution& /*dist*/) { // There is no kurtosis: typedef typename Policy::assert_undefined_type assert_type; @@ -334,7 +341,7 @@ inline RealType kurtosis(const cauchy_distribution& /*dist*/) } template -inline RealType kurtosis_excess(const cauchy_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const cauchy_distribution& /*dist*/) { // There is no kurtosis excess: typedef typename Policy::assert_undefined_type assert_type; diff --git a/include/boost/math/distributions/chi_squared.hpp b/include/boost/math/distributions/chi_squared.hpp index 071c7756f4..34be912265 100644 --- a/include/boost/math/distributions/chi_squared.hpp +++ b/include/boost/math/distributions/chi_squared.hpp @@ -26,14 +26,14 @@ class chi_squared_distribution typedef RealType value_type; typedef Policy policy_type; - chi_squared_distribution(RealType i) : m_df(i) + BOOST_GPU_ENABLED chi_squared_distribution(RealType i) : m_df(i) { RealType result; detail::check_df( "boost::math::chi_squared_distribution<%1%>::chi_squared_distribution", m_df, &result, Policy()); } // chi_squared_distribution - RealType degrees_of_freedom()const + BOOST_GPU_ENABLED RealType degrees_of_freedom()const { return m_df; } @@ -86,14 +86,14 @@ inline const std::pair support(const chi_squared_distributio } template -RealType pdf(const chi_squared_distribution& dist, const RealType& chi_square) +BOOST_GPU_ENABLED RealType pdf(const chi_squared_distribution& dist, const RealType& chi_square) { BOOST_MATH_STD_USING // for ADL of std functions RealType degrees_of_freedom = dist.degrees_of_freedom(); // Error check: RealType error_result; - static const char* function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_df( function, degrees_of_freedom, &error_result, Policy())) @@ -127,12 +127,12 @@ RealType pdf(const chi_squared_distribution& dist, const RealT } // pdf template -inline RealType cdf(const chi_squared_distribution& dist, const RealType& chi_square) +inline BOOST_GPU_ENABLED RealType cdf(const chi_squared_distribution& dist, const RealType& chi_square) { RealType degrees_of_freedom = dist.degrees_of_freedom(); // Error check: RealType error_result; - static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_df( function, degrees_of_freedom, &error_result, Policy())) @@ -165,11 +165,11 @@ inline RealType quantile(const chi_squared_distribution& dist, } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { RealType const& degrees_of_freedom = c.dist.degrees_of_freedom(); RealType const& chi_square = c.param; - static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df( @@ -203,22 +203,22 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const chi_squared_distribution& dist) { // Mean of Chi-Squared distribution = v. return dist.degrees_of_freedom(); } // mean template -inline RealType variance(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const chi_squared_distribution& dist) { // Variance of Chi-Squared distribution = 2v. return 2 * dist.degrees_of_freedom(); } // variance template -inline RealType mode(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::mode(const chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const chi_squared_distribution<%1%>&)"; // Most sources only define mode for df >= 2, // but for 0 <= df <= 2, the pdf maximum actually occurs at random variate = 0; // So one could extend the definition of mode thus: @@ -253,7 +253,7 @@ inline RealType mode(const chi_squared_distribution& dist) // Now implemented via quantile(half) in derived accessors. template -inline RealType skewness(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const chi_squared_distribution& dist) { BOOST_MATH_STD_USING // For ADL RealType df = dist.degrees_of_freedom(); @@ -261,14 +261,14 @@ inline RealType skewness(const chi_squared_distribution& dist) } template -inline RealType kurtosis(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); return 3 + 12 / df; } template -inline RealType kurtosis_excess(const chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); return 12 / df; diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp index 26d0d49e6d..371a44f297 100644 --- a/include/boost/math/distributions/complement.hpp +++ b/include/boost/math/distributions/complement.hpp @@ -7,6 +7,8 @@ #ifndef BOOST_STATS_COMPLEMENT_HPP #define BOOST_STATS_COMPLEMENT_HPP +#include + // // This code really defines our own tuple type. // It would be nice to reuse boost::math::tuple @@ -19,7 +21,7 @@ namespace boost{ namespace math{ template struct complemented2_type { - complemented2_type( + BOOST_GPU_ENABLED complemented2_type( const Dist& d, const RealType& p1) : dist(d), @@ -35,7 +37,7 @@ struct complemented2_type template struct complemented3_type { - complemented3_type( + BOOST_GPU_ENABLED complemented3_type( const Dist& d, const RealType1& p1, const RealType2& p2) @@ -53,7 +55,7 @@ struct complemented3_type template struct complemented4_type { - complemented4_type( + BOOST_GPU_ENABLED complemented4_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -74,7 +76,7 @@ struct complemented4_type template struct complemented5_type { - complemented5_type( + BOOST_GPU_ENABLED complemented5_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -98,7 +100,7 @@ struct complemented5_type template struct complemented6_type { - complemented6_type( + BOOST_GPU_ENABLED complemented6_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -125,7 +127,7 @@ struct complemented6_type template struct complemented7_type { - complemented7_type( + BOOST_GPU_ENABLED complemented7_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -153,37 +155,37 @@ struct complemented7_type }; template -inline complemented2_type complement(const Dist& d, const RealType& r) +inline BOOST_GPU_ENABLED complemented2_type complement(const Dist& d, const RealType& r) { return complemented2_type(d, r); } template -inline complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) +inline BOOST_GPU_ENABLED complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) { return complemented3_type(d, r1, r2); } template -inline complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) +inline BOOST_GPU_ENABLED complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) { return complemented4_type(d, r1, r2, r3); } template -inline complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) +inline BOOST_GPU_ENABLED complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) { return complemented5_type(d, r1, r2, r3, r4); } template -inline complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) +inline BOOST_GPU_ENABLED complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) { return complemented6_type(d, r1, r2, r3, r4, r5); } template -inline complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) +inline BOOST_GPU_ENABLED complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) { return complemented7_type(d, r1, r2, r3, r4, r5, r6); } diff --git a/include/boost/math/distributions/detail/common_error_handling.hpp b/include/boost/math/distributions/detail/common_error_handling.hpp index 486fb0b5c8..328ad388db 100644 --- a/include/boost/math/distributions/detail/common_error_handling.hpp +++ b/include/boost/math/distributions/detail/common_error_handling.hpp @@ -23,7 +23,7 @@ namespace boost{ namespace math{ namespace detail { template -inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) +inline BOOST_GPU_ENABLED bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) { if((prob < 0) || (prob > 1) || !(boost::math::isfinite)(prob)) { @@ -36,7 +36,7 @@ inline bool check_probability(const char* function, RealType const& prob, RealTy } template -inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) +inline BOOST_GPU_ENABLED bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 but NOT +infinity allowed. if((df <= 0) || !(boost::math::isfinite)(df)) { @@ -49,7 +49,7 @@ inline bool check_df(const char* function, RealType const& df, RealType* result, } template -inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) +inline BOOST_GPU_ENABLED bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 or +infinity are allowed. if( (df <= 0) || (boost::math::isnan)(df) ) { // is bad df <= 0 or NaN or -infinity. @@ -63,7 +63,7 @@ inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealTy template -inline bool check_scale( +inline BOOST_GPU_ENABLED bool check_scale( const char* function, RealType scale, RealType* result, @@ -80,7 +80,7 @@ inline bool check_scale( } template -inline bool check_location( +inline BOOST_GPU_ENABLED bool check_location( const char* function, RealType location, RealType* result, @@ -97,7 +97,7 @@ inline bool check_location( } template -inline bool check_x( +inline BOOST_GPU_ENABLED bool check_x( const char* function, RealType x, RealType* result, @@ -118,7 +118,7 @@ inline bool check_x( } // bool check_x template -inline bool check_x_not_NaN( +inline BOOST_GPU_ENABLED bool check_x_not_NaN( const char* function, RealType x, RealType* result, @@ -138,7 +138,7 @@ inline bool check_x_not_NaN( } // bool check_x_not_NaN template -inline bool check_x_gt0( +inline BOOST_GPU_ENABLED bool check_x_gt0( const char* function, RealType x, RealType* result, @@ -159,7 +159,7 @@ inline bool check_x_gt0( } // bool check_x_gt0 template -inline bool check_positive_x( +inline BOOST_GPU_ENABLED bool check_positive_x( const char* function, RealType x, RealType* result, @@ -179,7 +179,7 @@ inline bool check_positive_x( } template -inline bool check_non_centrality( +inline BOOST_GPU_ENABLED bool check_non_centrality( const char* function, RealType ncp, RealType* result, @@ -196,7 +196,7 @@ inline bool check_non_centrality( } template -inline bool check_finite( +inline BOOST_GPU_ENABLED bool check_finite( const char* function, RealType x, RealType* result, diff --git a/include/boost/math/distributions/detail/derived_accessors.hpp b/include/boost/math/distributions/detail/derived_accessors.hpp index 00f5a93258..f9620677e8 100644 --- a/include/boost/math/distributions/detail/derived_accessors.hpp +++ b/include/boost/math/distributions/detail/derived_accessors.hpp @@ -39,24 +39,24 @@ namespace boost{ namespace math{ template -typename Distribution::value_type variance(const Distribution& dist); +BOOST_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist); template -inline typename Distribution::value_type standard_deviation(const Distribution& dist) +inline BOOST_GPU_ENABLED typename Distribution::value_type standard_deviation(const Distribution& dist) { BOOST_MATH_STD_USING // ADL of sqrt. return sqrt(variance(dist)); } template -inline typename Distribution::value_type variance(const Distribution& dist) +inline BOOST_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist) { typename Distribution::value_type result = standard_deviation(dist); return result * result; } template -inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) { // hazard function // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ typedef typename Distribution::value_type value_type; @@ -75,7 +75,7 @@ inline typename Distribution::value_type hazard(const Distribution& dist, const } template -inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED typename Distribution::value_type chf(const Distribution& dist, const RealType& x) { // cumulative hazard function. // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ BOOST_MATH_STD_USING @@ -83,7 +83,7 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } template -inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist) +inline BOOST_GPU_ENABLED typename Distribution::value_type coefficient_of_variation(const Distribution& dist) { typedef typename Distribution::value_type value_type; typedef typename Distribution::policy_type policy_type; @@ -104,19 +104,19 @@ inline typename Distribution::value_type coefficient_of_variation(const Distribu // implementation with all arguments of the same type: // template -inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return pdf(dist, static_cast(x)); } template -inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return cdf(dist, static_cast(x)); } template -inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return quantile(dist, static_cast(x)); @@ -130,21 +130,21 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } */ template -inline typename Distribution::value_type cdf(const complemented2_type& c) +inline BOOST_GPU_ENABLED typename Distribution::value_type cdf(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return cdf(complement(c.dist, static_cast(c.param))); } template -inline typename Distribution::value_type quantile(const complemented2_type& c) +inline BOOST_GPU_ENABLED typename Distribution::value_type quantile(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return quantile(complement(c.dist, static_cast(c.param))); } template -inline typename Dist::value_type median(const Dist& d) +inline BOOST_GPU_ENABLED typename Dist::value_type median(const Dist& d) { // median - default definition for those distributions for which a // simple closed form is not known, // and for which a domain_error and/or NaN generating function is NOT defined. diff --git a/include/boost/math/distributions/detail/hypergeometric_pdf.hpp b/include/boost/math/distributions/detail/hypergeometric_pdf.hpp index 4364266514..2e4e6c7042 100644 --- a/include/boost/math/distributions/detail/hypergeometric_pdf.hpp +++ b/include/boost/math/distributions/detail/hypergeometric_pdf.hpp @@ -392,7 +392,7 @@ template T hypergeometric_pdf_factorial_imp(unsigned x, unsigned r, unsigned n, unsigned N, const Policy&) { BOOST_MATH_STD_USING - BOOST_ASSERT(N <= boost::math::max_factorial::value); + BOOST_MATH_ASSERT(N <= boost::math::max_factorial::value); T result = boost::math::unchecked_factorial(n); T num[3] = { boost::math::unchecked_factorial(r), diff --git a/include/boost/math/distributions/exponential.hpp b/include/boost/math/distributions/exponential.hpp index 05c49374ed..a50d5e1a50 100644 --- a/include/boost/math/distributions/exponential.hpp +++ b/include/boost/math/distributions/exponential.hpp @@ -29,7 +29,7 @@ namespace detail{ // Error check: // template -inline bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol) +inline BOOST_GPU_ENABLED bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol) { if((l <= 0) || !(boost::math::isfinite)(l)) { @@ -42,7 +42,7 @@ inline bool verify_lambda(const char* function, RealType l, RealType* presult, c } template -inline bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol) +inline BOOST_GPU_ENABLED bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol) { if((x < 0) || (boost::math::isnan)(x)) { @@ -63,14 +63,14 @@ class exponential_distribution typedef RealType value_type; typedef Policy policy_type; - exponential_distribution(RealType l_lambda = 1) + BOOST_GPU_ENABLED exponential_distribution(RealType l_lambda = 1) : m_lambda(l_lambda) { RealType err; detail::verify_lambda("boost::math::exponential_distribution<%1%>::exponential_distribution", l_lambda, &err, Policy()); } // exponential_distribution - RealType lambda()const { return m_lambda; } + BOOST_GPU_ENABLED RealType lambda()const { return m_lambda; } private: RealType m_lambda; @@ -103,11 +103,11 @@ inline const std::pair support(const exponential_distributio } template -inline RealType pdf(const exponential_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)"; RealType lambda = dist.lambda(); RealType result = 0; @@ -123,11 +123,11 @@ inline RealType pdf(const exponential_distribution& dist, cons } // pdf template -inline RealType cdf(const exponential_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const exponential_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = dist.lambda(); @@ -141,11 +141,11 @@ inline RealType cdf(const exponential_distribution& dist, cons } // cdf template -inline RealType quantile(const exponential_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const exponential_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = dist.lambda(); @@ -164,11 +164,11 @@ inline RealType quantile(const exponential_distribution& dist, } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = c.dist.lambda(); @@ -185,11 +185,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)"; RealType result = 0; RealType lambda = c.dist.lambda(); @@ -210,7 +210,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const exponential_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const exponential_distribution& dist) { RealType result = 0; RealType lambda = dist.lambda(); @@ -220,7 +220,7 @@ inline RealType mean(const exponential_distribution& dist) } template -inline RealType standard_deviation(const exponential_distribution& dist) +inline BOOST_GPU_ENABLED RealType standard_deviation(const exponential_distribution& dist) { RealType result = 0; RealType lambda = dist.lambda(); @@ -230,32 +230,32 @@ inline RealType standard_deviation(const exponential_distribution -inline RealType mode(const exponential_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType mode(const exponential_distribution& /*dist*/) { return 0; } template -inline RealType median(const exponential_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const exponential_distribution& dist) { using boost::math::constants::ln_two; return ln_two() / dist.lambda(); // ln(2) / lambda } template -inline RealType skewness(const exponential_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const exponential_distribution& /*dist*/) { return 2; } template -inline RealType kurtosis(const exponential_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const exponential_distribution& /*dist*/) { return 9; } template -inline RealType kurtosis_excess(const exponential_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const exponential_distribution& /*dist*/) { return 6; } diff --git a/include/boost/math/distributions/extreme_value.hpp b/include/boost/math/distributions/extreme_value.hpp index cb86de6612..cfc3928970 100644 --- a/include/boost/math/distributions/extreme_value.hpp +++ b/include/boost/math/distributions/extreme_value.hpp @@ -35,7 +35,7 @@ namespace detail{ // Error check: // template -inline bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol) +inline BOOST_GPU_ENABLED bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol) { if((b <= 0) || !(boost::math::isfinite)(b)) { @@ -56,7 +56,7 @@ class extreme_value_distribution typedef RealType value_type; typedef Policy policy_type; - extreme_value_distribution(RealType a = 0, RealType b = 1) + BOOST_GPU_ENABLED extreme_value_distribution(RealType a = 0, RealType b = 1) : m_a(a), m_b(b) { RealType err; @@ -64,8 +64,8 @@ class extreme_value_distribution detail::check_finite("boost::math::extreme_value_distribution<%1%>::extreme_value_distribution", a, &err, Policy()); } // extreme_value_distribution - RealType location()const { return m_a; } - RealType scale()const { return m_b; } + BOOST_GPU_ENABLED RealType location()const { return m_a; } + BOOST_GPU_ENABLED RealType scale()const { return m_b; } private: RealType m_a, m_b; @@ -91,11 +91,11 @@ inline const std::pair support(const extreme_value_distribut } template -inline RealType pdf(const extreme_value_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)"; RealType a = dist.location(); RealType b = dist.scale(); @@ -116,11 +116,11 @@ inline RealType pdf(const extreme_value_distribution& dist, co } // pdf template -inline RealType cdf(const extreme_value_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const extreme_value_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(x)) return x < 0 ? 0.0f : 1.0f; @@ -142,11 +142,11 @@ inline RealType cdf(const extreme_value_distribution& dist, co } // cdf template -RealType quantile(const extreme_value_distribution& dist, const RealType& p) +BOOST_GPU_ENABLED RealType quantile(const extreme_value_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; RealType a = dist.location(); RealType b = dist.scale(); @@ -169,11 +169,11 @@ RealType quantile(const extreme_value_distribution& dist, cons } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)"; if((boost::math::isinf)(c.param)) return c.param < 0 ? 1.0f : 0.0f; @@ -193,11 +193,11 @@ inline RealType cdf(const complemented2_type -RealType quantile(const complemented2_type, RealType>& c) +BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)"; RealType a = c.dist.location(); RealType b = c.dist.scale(); @@ -221,7 +221,7 @@ RealType quantile(const complemented2_type -inline RealType mean(const extreme_value_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const extreme_value_distribution& dist) { RealType a = dist.location(); RealType b = dist.scale(); @@ -234,7 +234,7 @@ inline RealType mean(const extreme_value_distribution& dist) } template -inline RealType standard_deviation(const extreme_value_distribution& dist) +inline BOOST_GPU_ENABLED RealType standard_deviation(const extreme_value_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions. @@ -248,20 +248,20 @@ inline RealType standard_deviation(const extreme_value_distribution -inline RealType mode(const extreme_value_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const extreme_value_distribution& dist) { return dist.location(); } template -inline RealType median(const extreme_value_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const extreme_value_distribution& dist) { using constants::ln_ln_two; return dist.location() - dist.scale() * ln_ln_two(); } template -inline RealType skewness(const extreme_value_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const extreme_value_distribution& /*dist*/) { // // This is 12 * sqrt(6) * zeta(3) / pi^3: @@ -271,14 +271,14 @@ inline RealType skewness(const extreme_value_distribution& /*d } template -inline RealType kurtosis(const extreme_value_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const extreme_value_distribution& /*dist*/) { // See http://mathworld.wolfram.com/ExtremeValueDistribution.html return RealType(27) / 5; } template -inline RealType kurtosis_excess(const extreme_value_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const extreme_value_distribution& /*dist*/) { // See http://mathworld.wolfram.com/ExtremeValueDistribution.html return RealType(12) / 5; diff --git a/include/boost/math/distributions/gamma.hpp b/include/boost/math/distributions/gamma.hpp index 9a9e2a4f52..28c87b2795 100644 --- a/include/boost/math/distributions/gamma.hpp +++ b/include/boost/math/distributions/gamma.hpp @@ -23,7 +23,7 @@ namespace detail { template -inline bool check_gamma_shape( +inline BOOST_GPU_ENABLED bool check_gamma_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -39,7 +39,7 @@ inline bool check_gamma_shape( } template -inline bool check_gamma_x( +inline BOOST_GPU_ENABLED bool check_gamma_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -55,7 +55,7 @@ inline bool check_gamma_x( } template -inline bool check_gamma( +inline BOOST_GPU_ENABLED bool check_gamma( const char* function, RealType scale, RealType shape, @@ -73,19 +73,19 @@ class gamma_distribution typedef RealType value_type; typedef Policy policy_type; - gamma_distribution(RealType l_shape, RealType l_scale = 1) + BOOST_GPU_ENABLED gamma_distribution(RealType l_shape, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; detail::check_gamma("boost::math::gamma_distribution<%1%>::gamma_distribution", l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; } @@ -116,11 +116,11 @@ inline const std::pair support(const gamma_distribution -inline RealType pdf(const gamma_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -140,11 +140,11 @@ inline RealType pdf(const gamma_distribution& dist, const Real } // pdf template -inline RealType cdf(const gamma_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -164,7 +164,7 @@ inline RealType quantile(const gamma_distribution& dist, const { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -184,11 +184,11 @@ inline RealType quantile(const gamma_distribution& dist, const } template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -209,7 +209,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -248,11 +248,11 @@ inline RealType mean(const gamma_distribution& dist) } template -inline RealType variance(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::variance(const gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -266,11 +266,11 @@ inline RealType variance(const gamma_distribution& dist) } template -inline RealType mode(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mode(const gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -295,11 +295,11 @@ inline RealType mode(const gamma_distribution& dist) //} template -inline RealType skewness(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::skewness(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -313,11 +313,11 @@ inline RealType skewness(const gamma_distribution& dist) } template -inline RealType kurtosis_excess(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -331,7 +331,7 @@ inline RealType kurtosis_excess(const gamma_distribution& dist } template -inline RealType kurtosis(const gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const gamma_distribution& dist) { return kurtosis_excess(dist) + 3; } diff --git a/include/boost/math/distributions/geometric.hpp b/include/boost/math/distributions/geometric.hpp index 6c9713eadd..eae2575246 100644 --- a/include/boost/math/distributions/geometric.hpp +++ b/include/boost/math/distributions/geometric.hpp @@ -67,7 +67,7 @@ namespace boost { // Common error checking routines for geometric distribution function: template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol) { if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) ) { @@ -80,13 +80,13 @@ namespace boost } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol) { return check_success_fraction(function, p, result, pol); } template - inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, p, result, pol) == false) { @@ -103,7 +103,7 @@ namespace boost } // Check_dist_and_k template - inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol) { if((check_dist(function, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false) { @@ -120,7 +120,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - geometric_distribution(RealType p) : m_p(p) + BOOST_GPU_ENABLED geometric_distribution(RealType p) : m_p(p) { // Constructor stores success_fraction p. RealType result; geometric_detail::check_dist( @@ -130,11 +130,11 @@ namespace boost } // geometric_distribution constructor. // Private data getter class member functions. - RealType success_fraction() const + BOOST_GPU_ENABLED RealType success_fraction() const { // Probability of success as fraction in range 0 to 1. return m_p; } - RealType successes() const + BOOST_GPU_ENABLED RealType successes() const { // Total number of successes r = 1 (for compatibility with negative binomial?). return 1; } @@ -145,7 +145,7 @@ namespace boost RealType trials, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::geometric<%1%>::find_lower_bound_on_p"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::geometric<%1%>::find_lower_bound_on_p"; RealType result = 0; // of error checks. RealType successes = 1; RealType failures = trials - successes; @@ -170,7 +170,7 @@ namespace boost RealType trials, RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test. { - static const char* function = "boost::math::geometric<%1%>::find_upper_bound_on_p"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::geometric<%1%>::find_upper_bound_on_p"; RealType result = 0; // of error checks. RealType successes = 1; RealType failures = trials - successes; @@ -203,7 +203,7 @@ namespace boost RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::geometric<%1%>::find_minimum_number_of_trials"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::geometric<%1%>::find_minimum_number_of_trials"; // Error checks: RealType result = 0; if(false == geometric_detail::check_dist_and_k( @@ -221,7 +221,7 @@ namespace boost RealType p, // success fraction 0 <= p <= 1. RealType alpha) // risk level threshold 0 <= alpha <= 1. { - static const char* function = "boost::math::geometric<%1%>::find_maximum_number_of_trials"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::geometric<%1%>::find_maximum_number_of_trials"; // Error checks: RealType result = 0; if(false == geometric_detail::check_dist_and_k( @@ -257,7 +257,7 @@ namespace boost } template - inline RealType mean(const geometric_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const geometric_distribution& dist) { // Mean of geometric distribution = (1-p)/p. return (1 - dist.success_fraction() ) / dist.success_fraction(); } // mean @@ -265,21 +265,21 @@ namespace boost // median implemented via quantile(half) in derived accessors. template - inline RealType mode(const geometric_distribution&) + inline BOOST_GPU_ENABLED RealType mode(const geometric_distribution&) { // Mode of geometric distribution = zero. BOOST_MATH_STD_USING // ADL of std functions. return 0; } // mode template - inline RealType variance(const geometric_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const geometric_distribution& dist) { // Variance of Binomial distribution = (1-p) / p^2. return (1 - dist.success_fraction()) / (dist.success_fraction() * dist.success_fraction()); } // variance template - inline RealType skewness(const geometric_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const geometric_distribution& dist) { // skewness of geometric distribution = 2-p / (sqrt(r(1-p)) BOOST_MATH_STD_USING // ADL of std functions. RealType p = dist.success_fraction(); @@ -287,7 +287,7 @@ namespace boost } // skewness template - inline RealType kurtosis(const geometric_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const geometric_distribution& dist) { // kurtosis of geometric distribution // http://en.wikipedia.org/wiki/geometric is kurtosis_excess so add 3 RealType p = dist.success_fraction(); @@ -295,7 +295,7 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const geometric_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const geometric_distribution& dist) { // kurtosis excess of geometric distribution // http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess RealType p = dist.success_fraction(); @@ -310,11 +310,11 @@ namespace boost // chf of geometric distribution provided by derived accessors. template - inline RealType pdf(const geometric_distribution& dist, const RealType& k) + inline BOOST_GPU_ENABLED RealType pdf(const geometric_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD BOOST_MATH_STD_USING // For ADL of math functions. - static const char* function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)"; RealType p = dist.success_fraction(); RealType result = 0; @@ -348,9 +348,9 @@ namespace boost } // geometric_pdf template - inline RealType cdf(const geometric_distribution& dist, const RealType& k) + inline BOOST_GPU_ENABLED RealType cdf(const geometric_distribution& dist, const RealType& k) { // Cumulative Distribution Function of geometric. - static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. @@ -379,10 +379,10 @@ namespace boost } // cdf Cumulative Distribution Function geometric. template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function geometric. BOOST_MATH_STD_USING - static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)"; // k argument may be integral, signed, or unsigned, or floating point. // If necessary, it has already been promoted from an integral type. RealType const& k = c.param; @@ -404,14 +404,14 @@ namespace boost } // cdf Complemented Cumulative Distribution Function geometric. template - inline RealType quantile(const geometric_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType quantile(const geometric_distribution& dist, const RealType& x) { // Quantile, percentile/100 or Percent Point geometric function. // Return the number of expected failures k for a given probability p. // Inverse cumulative Distribution Function or Quantile (percentile / 100) of geometric Probability. // k argument may be integral, signed, or unsigned, or floating point. - static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // ADL of std functions. RealType success_fraction = dist.success_fraction(); @@ -455,11 +455,11 @@ namespace boost } // RealType quantile(const geometric_distribution dist, p) template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { // Quantile or Percent Point Binomial function. // Return the number of expected failures k for a given // complement of the probability Q = 1 - P. - static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)"; BOOST_MATH_STD_USING // Error checks: RealType x = c.param; diff --git a/include/boost/math/distributions/inverse_chi_squared.hpp b/include/boost/math/distributions/inverse_chi_squared.hpp index c1e54905da..a2b58b2801 100644 --- a/include/boost/math/distributions/inverse_chi_squared.hpp +++ b/include/boost/math/distributions/inverse_chi_squared.hpp @@ -31,7 +31,7 @@ namespace boost{ namespace math{ namespace detail { template - inline bool check_inverse_chi_squared( // Check both distribution parameters. + inline BOOST_GPU_ENABLED bool check_inverse_chi_squared( // Check both distribution parameters. const char* function, RealType degrees_of_freedom, // degrees_of_freedom (aka nu). RealType scale, // scale (aka sigma^2) @@ -51,7 +51,7 @@ class inverse_chi_squared_distribution typedef RealType value_type; typedef Policy policy_type; - inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale) + BOOST_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale) { RealType result; detail::check_df( @@ -62,7 +62,7 @@ class inverse_chi_squared_distribution m_scale, &result, Policy()); } // inverse_chi_squared_distribution constructor - inverse_chi_squared_distribution(RealType df = 1) : m_df(df) + BOOST_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1) : m_df(df) { RealType result; m_scale = 1 / m_df ; // Default scale = 1 / degrees of freedom (Wikipedia definition 1). @@ -71,11 +71,11 @@ class inverse_chi_squared_distribution m_df, &result, Policy()); } // inverse_chi_squared_distribution - RealType degrees_of_freedom()const + BOOST_GPU_ENABLED RealType degrees_of_freedom()const { return m_df; // aka nu } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; // aka xi } @@ -112,14 +112,14 @@ inline const std::pair support(const inverse_chi_squared_dis } template -RealType pdf(const inverse_chi_squared_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const inverse_chi_squared_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); RealType error_result; - static const char* function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; if(false == detail::check_inverse_chi_squared (function, df, scale, &error_result, Policy()) @@ -152,9 +152,9 @@ RealType pdf(const inverse_chi_squared_distribution& dist, con } // pdf template -inline RealType cdf(const inverse_chi_squared_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const inverse_chi_squared_distribution& dist, const RealType& x) { - static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); RealType error_result; @@ -187,7 +187,7 @@ inline RealType quantile(const inverse_chi_squared_distribution -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { using boost::math::gamma_q_inv; RealType const& df = c.dist.degrees_of_freedom(); RealType const& scale = c.dist.scale(); RealType const& x = c.param; - static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)"; // Error check: RealType error_result; if(false == detail::check_df( @@ -251,7 +251,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const inverse_chi_squared_distribution& dist) { // Mean of inverse Chi-Squared distribution. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 2) return policies::raise_domain_error( function, @@ -288,11 +288,11 @@ inline RealType mean(const inverse_chi_squared_distribution& d } // mean template -inline RealType variance(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const inverse_chi_squared_distribution& dist) { // Variance of inverse Chi-Squared distribution. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 4) { return policies::raise_domain_error( @@ -304,14 +304,14 @@ inline RealType variance(const inverse_chi_squared_distribution -inline RealType mode(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const inverse_chi_squared_distribution& dist) { // mode is not defined in Mathematica. // See Discussion section http://en.wikipedia.org/wiki/Talk:Scaled-inverse-chi-square_distribution // for origin of the formula used below. RealType df = dist.degrees_of_freedom(); RealType scale = dist.scale(); - static const char* function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)"; if(df < 0) return policies::raise_domain_error( function, @@ -334,11 +334,11 @@ inline RealType mode(const inverse_chi_squared_distribution& d // Now implemented via quantile(half) in derived accessors. template -inline RealType skewness(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const inverse_chi_squared_distribution& dist) { BOOST_MATH_STD_USING // For ADL RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 6) return policies::raise_domain_error( function, @@ -349,10 +349,10 @@ inline RealType skewness(const inverse_chi_squared_distribution -inline RealType kurtosis(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const inverse_chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 8) return policies::raise_domain_error( function, @@ -363,10 +363,10 @@ inline RealType kurtosis(const inverse_chi_squared_distribution -inline RealType kurtosis_excess(const inverse_chi_squared_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const inverse_chi_squared_distribution& dist) { RealType df = dist.degrees_of_freedom(); - static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)"; if(df <= 8) return policies::raise_domain_error( function, diff --git a/include/boost/math/distributions/inverse_gamma.hpp b/include/boost/math/distributions/inverse_gamma.hpp index fa5d357ac7..0cf14b7812 100644 --- a/include/boost/math/distributions/inverse_gamma.hpp +++ b/include/boost/math/distributions/inverse_gamma.hpp @@ -35,7 +35,7 @@ namespace detail { template -inline bool check_inverse_gamma_shape( +inline BOOST_GPU_ENABLED bool check_inverse_gamma_shape( const char* function, // inverse_gamma RealType shape, // shape aka alpha RealType* result, // to update, perhaps with NaN @@ -56,7 +56,7 @@ inline bool check_inverse_gamma_shape( } //bool check_inverse_gamma_shape template -inline bool check_inverse_gamma_x( +inline BOOST_GPU_ENABLED bool check_inverse_gamma_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -72,7 +72,7 @@ inline bool check_inverse_gamma_x( } template -inline bool check_inverse_gamma( +inline BOOST_GPU_ENABLED bool check_inverse_gamma( const char* function, // TODO swap these over, so shape is first. RealType scale, // scale aka beta RealType shape, // shape aka alpha @@ -91,7 +91,7 @@ class inverse_gamma_distribution typedef RealType value_type; typedef Policy policy_type; - inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1) + BOOST_GPU_ENABLED inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; @@ -100,12 +100,12 @@ class inverse_gamma_distribution l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; } @@ -141,11 +141,11 @@ inline const std::pair support(const inverse_gamma_distribut } template -inline RealType pdf(const inverse_gamma_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const inverse_gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -189,11 +189,11 @@ inline RealType pdf(const inverse_gamma_distribution& dist, co } // pdf template -inline RealType cdf(const inverse_gamma_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const inverse_gamma_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -222,7 +222,7 @@ inline RealType quantile(const inverse_gamma_distribution& dis BOOST_MATH_STD_USING // for ADL of std functions using boost::math::gamma_q_inv; - static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -244,11 +244,11 @@ inline RealType quantile(const inverse_gamma_distribution& dis } template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -272,7 +272,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -323,11 +323,11 @@ inline RealType mean(const inverse_gamma_distribution& dist) } // mean template -inline RealType variance(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -349,11 +349,11 @@ inline RealType variance(const inverse_gamma_distribution& dis } template -inline RealType mode(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -376,11 +376,11 @@ inline RealType mode(const inverse_gamma_distribution& dist) //} template -inline RealType skewness(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -402,11 +402,11 @@ inline RealType skewness(const inverse_gamma_distribution& dis } template -inline RealType kurtosis_excess(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const inverse_gamma_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -428,9 +428,9 @@ inline RealType kurtosis_excess(const inverse_gamma_distribution -inline RealType kurtosis(const inverse_gamma_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const inverse_gamma_distribution& dist) { - static const char* function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)"; RealType shape = dist.shape(); RealType scale = dist.scale(); diff --git a/include/boost/math/distributions/inverse_gaussian.hpp b/include/boost/math/distributions/inverse_gaussian.hpp index e3aa4e0650..7f8e487873 100644 --- a/include/boost/math/distributions/inverse_gaussian.hpp +++ b/include/boost/math/distributions/inverse_gaussian.hpp @@ -74,10 +74,10 @@ class inverse_gaussian_distribution typedef RealType value_type; typedef Policy policy_type; - inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1) + BOOST_GPU_ENABLED inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1) : m_mean(l_mean), m_scale(l_scale) { // Default is a 1,1 inverse_gaussian distribution. - static const char* function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution"; RealType result; detail::check_scale(function, l_scale, &result, Policy()); @@ -85,22 +85,22 @@ class inverse_gaussian_distribution detail::check_x_gt0(function, l_mean, &result, Policy()); } - RealType mean()const + BOOST_GPU_ENABLED RealType mean()const { // alias for location. return m_mean; // aka mu } // Synonyms, provided to allow generic use of find_location and find_scale. - RealType location()const + BOOST_GPU_ENABLED RealType location()const { // location, aka mu. return m_mean; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { // scale, aka lambda. return m_scale; } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { // shape, aka phi = lambda/mu. return m_scale / m_mean; } @@ -131,14 +131,14 @@ inline const std::pair support(const inverse_gaussian_distri } template -inline RealType pdf(const inverse_gaussian_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const inverse_gaussian_distribution& dist, const RealType& x) { // Probability Density Function BOOST_MATH_STD_USING // for ADL of std functions RealType scale = dist.scale(); RealType mean = dist.mean(); RealType result = 0; - static const char* function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -168,13 +168,13 @@ inline RealType pdf(const inverse_gaussian_distribution& dist, } // pdf template -inline RealType cdf(const inverse_gaussian_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const inverse_gaussian_distribution& dist, const RealType& x) { // Cumulative Density Function. BOOST_MATH_STD_USING // for ADL of std functions. RealType scale = dist.scale(); RealType mean = dist.mean(); - static const char* function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) { @@ -363,14 +363,14 @@ inline RealType quantile(const inverse_gaussian_distribution& } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions. RealType scale = c.dist.scale(); RealType mean = c.dist.mean(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)"; // infinite arguments not supported. //if((boost::math::isinf)(x)) //{ @@ -448,25 +448,25 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const inverse_gaussian_distribution& dist) { // aka mu return dist.mean(); } template -inline RealType scale(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType scale(const inverse_gaussian_distribution& dist) { // aka lambda return dist.scale(); } template -inline RealType shape(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType shape(const inverse_gaussian_distribution& dist) { // aka phi return dist.shape(); } template -inline RealType standard_deviation(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType standard_deviation(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -476,7 +476,7 @@ inline RealType standard_deviation(const inverse_gaussian_distribution -inline RealType mode(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -487,7 +487,7 @@ inline RealType mode(const inverse_gaussian_distribution& dist } template -inline RealType skewness(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const inverse_gaussian_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -497,7 +497,7 @@ inline RealType skewness(const inverse_gaussian_distribution& } template -inline RealType kurtosis(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const inverse_gaussian_distribution& dist) { RealType scale = dist.scale(); RealType mean = dist.mean(); @@ -506,7 +506,7 @@ inline RealType kurtosis(const inverse_gaussian_distribution& } template -inline RealType kurtosis_excess(const inverse_gaussian_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const inverse_gaussian_distribution& dist) { RealType scale = dist.scale(); RealType mean = dist.mean(); diff --git a/include/boost/math/distributions/laplace.hpp b/include/boost/math/distributions/laplace.hpp index 09b24c868b..e7b723775d 100644 --- a/include/boost/math/distributions/laplace.hpp +++ b/include/boost/math/distributions/laplace.hpp @@ -42,7 +42,7 @@ class laplace_distribution // ---------------------------------- // Constructor(s) // ---------------------------------- - laplace_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_GPU_ENABLED laplace_distribution(RealType l_location = 0, RealType l_scale = 1) : m_location(l_location), m_scale(l_scale) { RealType result; @@ -54,17 +54,17 @@ class laplace_distribution // Public functions // ---------------------------------- - RealType location() const + BOOST_GPU_ENABLED RealType location() const { return m_location; } - RealType scale() const + BOOST_GPU_ENABLED RealType scale() const { return m_scale; } - bool check_parameters(const char* function, RealType* result) const + BOOST_GPU_ENABLED bool check_parameters(const char* function, RealType* result) const { if(false == detail::check_scale(function, m_scale, result, Policy())) return false; if(false == detail::check_location(function, m_location, result, Policy())) return false; @@ -112,7 +112,7 @@ inline const std::pair support(const laplace_distribution -inline RealType pdf(const laplace_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions @@ -144,7 +144,7 @@ inline RealType pdf(const laplace_distribution& dist, const Re } // pdf template -inline RealType cdf(const laplace_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const laplace_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // For ADL of std functions. @@ -179,7 +179,7 @@ inline RealType cdf(const laplace_distribution& dist, const Re template -inline RealType quantile(const laplace_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const laplace_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions. @@ -217,7 +217,7 @@ inline RealType quantile(const laplace_distribution& dist, con template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Calculate complement of cdf. BOOST_MATH_STD_USING // for ADL of std functions @@ -257,7 +257,7 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions. @@ -292,43 +292,43 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const laplace_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const laplace_distribution& dist) { return dist.location(); } template -inline RealType standard_deviation(const laplace_distribution& dist) +inline BOOST_GPU_ENABLED RealType standard_deviation(const laplace_distribution& dist) { return constants::root_two() * dist.scale(); } template -inline RealType mode(const laplace_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const laplace_distribution& dist) { return dist.location(); } template -inline RealType median(const laplace_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const laplace_distribution& dist) { return dist.location(); } template -inline RealType skewness(const laplace_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const laplace_distribution& /*dist*/) { return 0; } template -inline RealType kurtosis(const laplace_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const laplace_distribution& /*dist*/) { return 6; } template -inline RealType kurtosis_excess(const laplace_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const laplace_distribution& /*dist*/) { return 3; } diff --git a/include/boost/math/distributions/logistic.hpp b/include/boost/math/distributions/logistic.hpp index b3d16b7197..fbe02d8961 100644 --- a/include/boost/math/distributions/logistic.hpp +++ b/include/boost/math/distributions/logistic.hpp @@ -24,22 +24,22 @@ namespace boost { namespace math { typedef RealType value_type; typedef Policy policy_type; - logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor. + BOOST_GPU_ENABLED logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor. : m_location(l_location), m_scale(l_scale) { - static const char* function = "boost::math::logistic_distribution<%1%>::logistic_distribution"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::logistic_distribution<%1%>::logistic_distribution"; RealType result; detail::check_scale(function, l_scale, &result, Policy()); detail::check_location(function, l_location, &result, Policy()); } // Accessor functions. - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; } - RealType location()const + BOOST_GPU_ENABLED RealType location()const { return m_location; } @@ -70,9 +70,9 @@ namespace boost { namespace math { } template - inline RealType pdf(const logistic_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType pdf(const logistic_distribution& dist, const RealType& x) { - static const char* function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType location = dist.location(); RealType result = 0; @@ -107,12 +107,12 @@ namespace boost { namespace math { } template - inline RealType cdf(const logistic_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const logistic_distribution& dist, const RealType& x) { RealType scale = dist.scale(); RealType location = dist.location(); RealType result = 0; // of checks. - static const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)"; if(false == detail::check_scale(function, scale, &result, Policy())) { return result; @@ -142,13 +142,13 @@ namespace boost { namespace math { } template - inline RealType quantile(const logistic_distribution& dist, const RealType& p) + inline BOOST_GPU_ENABLED RealType quantile(const logistic_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING RealType location = dist.location(); RealType scale = dist.scale(); - static const char* function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -178,13 +178,13 @@ namespace boost { namespace math { } // RealType quantile(const logistic_distribution& dist, const RealType& p) template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING RealType location = c.dist.location(); RealType scale = c.dist.scale(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) @@ -213,12 +213,12 @@ namespace boost { namespace math { } template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING RealType scale = c.dist.scale(); RealType location = c.dist.location(); - static const char* function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, scale, &result, Policy())) return result; @@ -249,13 +249,13 @@ namespace boost { namespace math { } template - inline RealType mean(const logistic_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const logistic_distribution& dist) { return dist.location(); } // RealType mean(const logistic_distribution& dist) template - inline RealType variance(const logistic_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const logistic_distribution& dist) { BOOST_MATH_STD_USING RealType scale = dist.scale(); @@ -263,30 +263,30 @@ namespace boost { namespace math { } // RealType variance(const logistic_distribution& dist) template - inline RealType mode(const logistic_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const logistic_distribution& dist) { return dist.location(); } template - inline RealType median(const logistic_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const logistic_distribution& dist) { return dist.location(); } template - inline RealType skewness(const logistic_distribution& /*dist*/) + inline BOOST_GPU_ENABLED RealType skewness(const logistic_distribution& /*dist*/) { return 0; } // RealType skewness(const logistic_distribution& dist) template - inline RealType kurtosis_excess(const logistic_distribution& /*dist*/) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const logistic_distribution& /*dist*/) { return static_cast(6)/5; } // RealType kurtosis_excess(const logistic_distribution& dist) template - inline RealType kurtosis(const logistic_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const logistic_distribution& dist) { return kurtosis_excess(dist) + 3; } // RealType kurtosis_excess(const logistic_distribution& dist) diff --git a/include/boost/math/distributions/lognormal.hpp b/include/boost/math/distributions/lognormal.hpp index 4e6c0610d4..b8a3ce525b 100644 --- a/include/boost/math/distributions/lognormal.hpp +++ b/include/boost/math/distributions/lognormal.hpp @@ -23,7 +23,7 @@ namespace detail { template - inline bool check_lognormal_x( + inline BOOST_GPU_ENABLED bool check_lognormal_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -48,7 +48,7 @@ class lognormal_distribution typedef RealType value_type; typedef Policy policy_type; - lognormal_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_GPU_ENABLED lognormal_distribution(RealType l_location = 0, RealType l_scale = 1) : m_location(l_location), m_scale(l_scale) { RealType result; @@ -56,12 +56,12 @@ class lognormal_distribution detail::check_location("boost::math::lognormal_distribution<%1%>::lognormal_distribution", l_location, &result, Policy()); } - RealType location()const + BOOST_GPU_ENABLED RealType location()const { return m_location; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; } @@ -91,14 +91,14 @@ inline const std::pair support(const lognormal_distribution< } template -RealType pdf(const lognormal_distribution& dist, const RealType& x) +BOOST_GPU_ENABLED RealType pdf(const lognormal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType mu = dist.location(); RealType sigma = dist.scale(); - static const char* function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, sigma, &result, Policy())) @@ -122,11 +122,11 @@ RealType pdf(const lognormal_distribution& dist, const RealTyp } template -inline RealType cdf(const lognormal_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const lognormal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, dist.scale(), &result, Policy())) @@ -144,11 +144,11 @@ inline RealType cdf(const lognormal_distribution& dist, const } template -inline RealType quantile(const lognormal_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const lognormal_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, dist.scale(), &result, Policy())) @@ -168,11 +168,11 @@ inline RealType quantile(const lognormal_distribution& dist, c } template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy())) @@ -190,11 +190,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)"; RealType result = 0; if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy())) @@ -214,7 +214,7 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -231,7 +231,7 @@ inline RealType mean(const lognormal_distribution& dist) } template -inline RealType variance(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -248,7 +248,7 @@ inline RealType variance(const lognormal_distribution& dist) } template -inline RealType mode(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -265,7 +265,7 @@ inline RealType mode(const lognormal_distribution& dist) } template -inline RealType median(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions RealType mu = dist.location(); @@ -273,7 +273,7 @@ inline RealType median(const lognormal_distribution& dist) } template -inline RealType skewness(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -293,7 +293,7 @@ inline RealType skewness(const lognormal_distribution& dist) } template -inline RealType kurtosis(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions @@ -311,7 +311,7 @@ inline RealType kurtosis(const lognormal_distribution& dist) } template -inline RealType kurtosis_excess(const lognormal_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const lognormal_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions diff --git a/include/boost/math/distributions/normal.hpp b/include/boost/math/distributions/normal.hpp index 32cf66e3ef..5a7a2e0673 100644 --- a/include/boost/math/distributions/normal.hpp +++ b/include/boost/math/distributions/normal.hpp @@ -31,32 +31,32 @@ class normal_distribution typedef RealType value_type; typedef Policy policy_type; - normal_distribution(RealType l_mean = 0, RealType sd = 1) + BOOST_GPU_ENABLED normal_distribution(RealType l_mean = 0, RealType sd = 1) : m_mean(l_mean), m_sd(sd) { // Default is a 'standard' normal distribution N01. - static const char* function = "boost::math::normal_distribution<%1%>::normal_distribution"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::normal_distribution<%1%>::normal_distribution"; RealType result; detail::check_scale(function, sd, &result, Policy()); detail::check_location(function, l_mean, &result, Policy()); } - RealType mean()const + BOOST_GPU_ENABLED RealType mean()const { // alias for location. return m_mean; } - RealType standard_deviation()const + BOOST_GPU_ENABLED RealType standard_deviation()const { // alias for scale. return m_sd; } // Synonyms, provided to allow generic use of find_location and find_scale. - RealType location()const + BOOST_GPU_ENABLED RealType location()const { // location. return m_mean; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { // scale. return m_sd; } @@ -109,14 +109,14 @@ inline const std::pair support(const normal_distribution -inline RealType pdf(const normal_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const normal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -152,13 +152,13 @@ inline RealType pdf(const normal_distribution& dist, const Rea } // pdf template -inline RealType cdf(const normal_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const normal_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) { @@ -192,13 +192,13 @@ inline RealType cdf(const normal_distribution& dist, const Rea } // cdf template -inline RealType quantile(const normal_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const normal_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = dist.standard_deviation(); RealType mean = dist.mean(); - static const char* function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -216,14 +216,14 @@ inline RealType quantile(const normal_distribution& dist, cons } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = c.dist.standard_deviation(); RealType mean = c.dist.mean(); RealType x = c.param; - static const char* function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) @@ -253,13 +253,13 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType sd = c.dist.standard_deviation(); RealType mean = c.dist.mean(); - static const char* function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)"; RealType result = 0; if(false == detail::check_scale(function, sd, &result, Policy())) return result; @@ -275,43 +275,43 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const normal_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const normal_distribution& dist) { return dist.mean(); } template -inline RealType standard_deviation(const normal_distribution& dist) +inline BOOST_GPU_ENABLED RealType standard_deviation(const normal_distribution& dist) { return dist.standard_deviation(); } template -inline RealType mode(const normal_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const normal_distribution& dist) { return dist.mean(); } template -inline RealType median(const normal_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const normal_distribution& dist) { return dist.mean(); } template -inline RealType skewness(const normal_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const normal_distribution& /*dist*/) { return 0; } template -inline RealType kurtosis(const normal_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const normal_distribution& /*dist*/) { return 3; } template -inline RealType kurtosis_excess(const normal_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const normal_distribution& /*dist*/) { return 0; } diff --git a/include/boost/math/distributions/pareto.hpp b/include/boost/math/distributions/pareto.hpp index 1c6cf350f8..0e2b1fe507 100644 --- a/include/boost/math/distributions/pareto.hpp +++ b/include/boost/math/distributions/pareto.hpp @@ -30,7 +30,7 @@ namespace boost namespace detail { // Parameter checking. template - inline bool check_pareto_scale( + inline BOOST_GPU_ENABLED bool check_pareto_scale( const char* function, RealType scale, RealType* result, const Policy& pol) @@ -59,7 +59,7 @@ namespace boost } // bool check_pareto_scale template - inline bool check_pareto_shape( + inline BOOST_GPU_ENABLED bool check_pareto_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -88,7 +88,7 @@ namespace boost } // bool check_pareto_shape( template - inline bool check_pareto_x( + inline BOOST_GPU_ENABLED bool check_pareto_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -117,7 +117,7 @@ namespace boost } // bool check_pareto_x template - inline bool check_pareto( // distribution parameters. + inline BOOST_GPU_ENABLED bool check_pareto( // distribution parameters. const char* function, RealType scale, RealType shape, @@ -136,19 +136,19 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) + BOOST_GPU_ENABLED pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) : m_scale(l_scale), m_shape(l_shape) { // Constructor. RealType result = 0; detail::check_pareto("boost::math::pareto_distribution<%1%>::pareto_distribution", l_scale, l_shape, &result, Policy()); } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { // AKA Xm and Wolfram b and beta return m_scale; } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { // AKA k and Wolfram a and alpha return m_shape; } @@ -176,10 +176,10 @@ namespace boost } // support template - inline RealType pdf(const pareto_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType pdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -195,10 +195,10 @@ namespace boost } // pdf template - inline RealType cdf(const pareto_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -218,10 +218,10 @@ namespace boost } // cdf template - inline RealType quantile(const pareto_distribution& dist, const RealType& p) + inline BOOST_GPU_ENABLED RealType quantile(const pareto_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); @@ -245,10 +245,10 @@ namespace boost } // quantile template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType x = c.param; RealType scale = c.dist.scale(); @@ -267,10 +267,10 @@ namespace boost } // cdf complement template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType q = c.param; RealType scale = c.dist.scale(); @@ -294,10 +294,10 @@ namespace boost } // quantile complement template - inline RealType mean(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -314,16 +314,16 @@ namespace boost } // mean template - inline RealType mode(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const pareto_distribution& dist) { return dist.scale(); } // mode template - inline RealType median(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -333,12 +333,12 @@ namespace boost } // median template - inline RealType variance(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const pareto_distribution& dist) { RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); - static const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, scale, shape, &result, Policy())) { return result; @@ -358,12 +358,12 @@ namespace boost } // variance template - inline RealType skewness(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const pareto_distribution& dist) { BOOST_MATH_STD_USING RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -384,11 +384,11 @@ namespace boost } // skewness template - inline RealType kurtosis(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -408,11 +408,11 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; diff --git a/include/boost/math/distributions/poisson.hpp b/include/boost/math/distributions/poisson.hpp index e4665bff69..827d455638 100644 --- a/include/boost/math/distributions/poisson.hpp +++ b/include/boost/math/distributions/poisson.hpp @@ -59,7 +59,7 @@ namespace boost // checks are always performed, even if exceptions are not enabled. template - inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(mean) || (mean < 0)) { @@ -72,7 +72,7 @@ namespace boost } // bool check_mean template - inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // mean == 0 is considered an error. if( !(boost::math::isfinite)(mean) || (mean <= 0)) { @@ -85,13 +85,13 @@ namespace boost } // bool check_mean_NZ template - inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // Only one check, so this is redundant really but should be optimized away. return check_mean_NZ(function, mean, result, pol); } // bool check_dist template - inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) { if((k < 0) || !(boost::math::isfinite)(k)) { @@ -104,7 +104,7 @@ namespace boost } // bool check_k template - inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_k(function, k, result, pol) == false)) @@ -115,7 +115,7 @@ namespace boost } // bool check_dist_and_k template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { // Check 0 <= p <= 1 if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1)) { @@ -128,7 +128,7 @@ namespace boost } // bool check_prob template - inline bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_prob(function, p, result, pol) == false)) @@ -147,7 +147,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). + BOOST_GPU_ENABLED poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). { // Expected mean number of events that occur during the given interval. RealType r; poisson_detail::check_dist( @@ -156,7 +156,7 @@ namespace boost &r, Policy()); } // poisson_distribution constructor. - RealType mean() const + BOOST_GPU_ENABLED RealType mean() const { // Private data getter function. return m_l; } @@ -185,13 +185,13 @@ namespace boost } template - inline RealType mean(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const poisson_distribution& dist) { // Mean of poisson distribution = lambda. return dist.mean(); } // mean template - inline RealType mode(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const poisson_distribution& dist) { // mode. BOOST_MATH_STD_USING // ADL of std functions. return floor(dist.mean()); @@ -208,7 +208,7 @@ namespace boost // Now implemented via quantile(half) in derived accessors. template - inline RealType variance(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const poisson_distribution& dist) { // variance. return dist.mean(); } @@ -217,14 +217,14 @@ namespace boost // standard_deviation provided by derived accessors. template - inline RealType skewness(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const poisson_distribution& dist) { // skewness = sqrt(l). BOOST_MATH_STD_USING // ADL of std functions. return 1 / sqrt(dist.mean()); } template - inline RealType kurtosis_excess(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const poisson_distribution& dist) { // skewness = sqrt(l). return 1 / dist.mean(); // kurtosis_excess 1/mean from Wiki & MathWorld eq 31. // http://mathworld.wolfram.com/Kurtosis.html explains that the kurtosis excess @@ -233,7 +233,7 @@ namespace boost } // RealType kurtosis_excess template - inline RealType kurtosis(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const poisson_distribution& dist) { // kurtosis is 4th moment about the mean = u4 / sd ^ 4 // http://en.wikipedia.org/wiki/Curtosis // kurtosis can range from -2 (flat top) to +infinity (sharp peak & heavy tails). @@ -245,7 +245,7 @@ namespace boost } // RealType kurtosis template - RealType pdf(const poisson_distribution& dist, const RealType& k) + BOOST_GPU_ENABLED RealType pdf(const poisson_distribution& dist, const RealType& k) { // Probability Density/Mass Function. // Probability that there are EXACTLY k occurrences (or arrivals). BOOST_FPU_EXCEPTION_GUARD @@ -277,7 +277,7 @@ namespace boost } // pdf template - RealType cdf(const poisson_distribution& dist, const RealType& k) + BOOST_GPU_ENABLED RealType cdf(const poisson_distribution& dist, const RealType& k) { // Cumulative Distribution Function Poisson. // The random variate k is the number of occurrences(or arrivals) // k argument may be integral, signed, or unsigned, or floating point. @@ -328,7 +328,7 @@ namespace boost } // binomial cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function Poisson // The random variate k is the number of events, occurrences or arrivals. // k argument may be integral, signed, or unsigned, or floating point. diff --git a/include/boost/math/distributions/rayleigh.hpp b/include/boost/math/distributions/rayleigh.hpp index 744733a9fa..4de2016431 100644 --- a/include/boost/math/distributions/rayleigh.hpp +++ b/include/boost/math/distributions/rayleigh.hpp @@ -26,7 +26,7 @@ namespace boost{ namespace math{ namespace detail { // Error checks: template - inline bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol) + inline BOOST_GPU_ENABLED bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol) { if((sigma <= 0) || (!(boost::math::isfinite)(sigma))) { @@ -39,7 +39,7 @@ namespace detail } // bool verify_sigma template - inline bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol) + inline BOOST_GPU_ENABLED bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol) { if((x < 0) || (boost::math::isnan)(x)) { @@ -59,14 +59,14 @@ class rayleigh_distribution typedef RealType value_type; typedef Policy policy_type; - rayleigh_distribution(RealType l_sigma = 1) + BOOST_GPU_ENABLED rayleigh_distribution(RealType l_sigma = 1) : m_sigma(l_sigma) { RealType err; detail::verify_sigma("boost::math::rayleigh_distribution<%1%>::rayleigh_distribution", l_sigma, &err, Policy()); } // rayleigh_distribution - RealType sigma()const + BOOST_GPU_ENABLED RealType sigma()const { // Accessor. return m_sigma; } @@ -93,13 +93,13 @@ inline const std::pair support(const rayleigh_distribution -inline RealType pdf(const rayleigh_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function exp. RealType sigma = dist.sigma(); RealType result = 0; - static const char* function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -118,13 +118,13 @@ inline RealType pdf(const rayleigh_distribution& dist, const R } // pdf template -inline RealType cdf(const rayleigh_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const rayleigh_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -138,13 +138,13 @@ inline RealType cdf(const rayleigh_distribution& dist, const R } // cdf template -inline RealType quantile(const rayleigh_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const rayleigh_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) return result; if(false == detail::check_probability(function, p, &result, Policy())) @@ -163,13 +163,13 @@ inline RealType quantile(const rayleigh_distribution& dist, co } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; RealType sigma = c.dist.sigma(); - static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -188,13 +188,13 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions, log & sqrt. RealType result = 0; RealType sigma = c.dist.sigma(); - static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -217,11 +217,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const rayleigh_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const rayleigh_distribution& dist) { RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -231,11 +231,11 @@ inline RealType mean(const rayleigh_distribution& dist) } // mean template -inline RealType variance(const rayleigh_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const rayleigh_distribution& dist) { RealType result = 0; RealType sigma = dist.sigma(); - static const char* function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)"; if(false == detail::verify_sigma(function, sigma, &result, Policy())) { return result; @@ -245,20 +245,20 @@ inline RealType variance(const rayleigh_distribution& dist) } // variance template -inline RealType mode(const rayleigh_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const rayleigh_distribution& dist) { return dist.sigma(); } template -inline RealType median(const rayleigh_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const rayleigh_distribution& dist) { using boost::math::constants::root_ln_four; return root_ln_four() * dist.sigma(); } template -inline RealType skewness(const rayleigh_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType skewness(const rayleigh_distribution& /*dist*/) { // using namespace boost::math::constants; return static_cast(0.63111065781893713819189935154422777984404221106391L); @@ -267,7 +267,7 @@ inline RealType skewness(const rayleigh_distribution& /*dist*/ } template -inline RealType kurtosis(const rayleigh_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis(const rayleigh_distribution& /*dist*/) { // using namespace boost::math::constants; return static_cast(3.2450893006876380628486604106197544154170667057995L); @@ -277,7 +277,7 @@ inline RealType kurtosis(const rayleigh_distribution& /*dist*/ } template -inline RealType kurtosis_excess(const rayleigh_distribution& /*dist*/) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const rayleigh_distribution& /*dist*/) { //using namespace boost::math::constants; // Computed using NTL at 150 bit, about 50 decimal digits. diff --git a/include/boost/math/distributions/skew_normal.hpp b/include/boost/math/distributions/skew_normal.hpp index f348347ede..1a2dcf37a0 100644 --- a/include/boost/math/distributions/skew_normal.hpp +++ b/include/boost/math/distributions/skew_normal.hpp @@ -392,7 +392,7 @@ namespace boost{ namespace math{ const diff_type d = std::distance(shapes, result_ptr); - BOOST_ASSERT(d > static_cast(0)); + BOOST_MATH_ASSERT(d > static_cast(0)); // refine if(d < static_cast(21)) // shape smaller 100 @@ -531,7 +531,7 @@ namespace boost{ namespace math{ const diff_type d = std::distance(shapes, result_ptr); - BOOST_ASSERT(d > static_cast(0)); + BOOST_MATH_ASSERT(d > static_cast(0)); // TODO: make the search bounds smarter, depending on the shape parameter RealType search_min = 0; // below zero was caught above diff --git a/include/boost/math/distributions/triangular.hpp b/include/boost/math/distributions/triangular.hpp index 1e49a38faf..92d8abef19 100644 --- a/include/boost/math/distributions/triangular.hpp +++ b/include/boost/math/distributions/triangular.hpp @@ -29,7 +29,7 @@ namespace boost{ namespace math namespace detail { template - inline bool check_triangular_lower( + inline BOOST_GPU_ENABLED bool check_triangular_lower( const char* function, RealType lower, RealType* result, const Policy& pol) @@ -48,7 +48,7 @@ namespace boost{ namespace math } // bool check_triangular_lower( template - inline bool check_triangular_mode( + inline BOOST_GPU_ENABLED bool check_triangular_mode( const char* function, RealType mode, RealType* result, const Policy& pol) @@ -67,7 +67,7 @@ namespace boost{ namespace math } // bool check_triangular_mode( template - inline bool check_triangular_upper( + inline BOOST_GPU_ENABLED bool check_triangular_upper( const char* function, RealType upper, RealType* result, const Policy& pol) @@ -86,7 +86,7 @@ namespace boost{ namespace math } // bool check_triangular_upper( template - inline bool check_triangular_x( + inline BOOST_GPU_ENABLED bool check_triangular_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -105,7 +105,7 @@ namespace boost{ namespace math } // bool check_triangular_x template - inline bool check_triangular( + inline BOOST_GPU_ENABLED bool check_triangular( const char* function, RealType lower, RealType mode, @@ -153,7 +153,7 @@ namespace boost{ namespace math typedef RealType value_type; typedef Policy policy_type; - triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1) + BOOST_GPU_ENABLED triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1) : m_lower(l_lower), m_mode(l_mode), m_upper(l_upper) // Constructor. { // Evans says 'standard triangular' is lower 0, mode 1/2, upper 1, // has median sqrt(c/2) for c <=1/2 and 1 - sqrt(1-c)/2 for c >= 1/2 @@ -163,15 +163,15 @@ namespace boost{ namespace math detail::check_triangular("boost::math::triangular_distribution<%1%>::triangular_distribution",l_lower, l_mode, l_upper, &result, Policy()); } // Accessor functions. - RealType lower()const + BOOST_GPU_ENABLED RealType lower()const { return m_lower; } - RealType mode()const + BOOST_GPU_ENABLED RealType mode()const { return m_mode; } - RealType upper()const + BOOST_GPU_ENABLED RealType upper()const { return m_upper; } @@ -199,9 +199,9 @@ namespace boost{ namespace math } template - RealType pdf(const triangular_distribution& dist, const RealType& x) + BOOST_GPU_ENABLED RealType pdf(const triangular_distribution& dist, const RealType& x) { - static const char* function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -237,9 +237,9 @@ namespace boost{ namespace math } // RealType pdf(const triangular_distribution& dist, const RealType& x) template - inline RealType cdf(const triangular_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const triangular_distribution& dist, const RealType& x) { - static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -272,10 +272,10 @@ namespace boost{ namespace math } // RealType cdf(const triangular_distribution& dist, const RealType& x) template - RealType quantile(const triangular_distribution& dist, const RealType& p) + BOOST_GPU_ENABLED RealType quantile(const triangular_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions (sqrt). - static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -315,9 +315,9 @@ namespace boost{ namespace math } // RealType quantile(const triangular_distribution& dist, const RealType& q) template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { - static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)"; RealType lower = c.dist.lower(); RealType mode = c.dist.mode(); RealType upper = c.dist.upper(); @@ -350,10 +350,10 @@ namespace boost{ namespace math } // RealType cdf(const complemented2_type, RealType>& c) template - RealType quantile(const complemented2_type, RealType>& c) + BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // Aid ADL for sqrt. - static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)"; RealType l = c.dist.lower(); RealType m = c.dist.mode(); RealType u = c.dist.upper(); @@ -399,9 +399,9 @@ namespace boost{ namespace math } // RealType quantile(const complemented2_type, RealType>& c) template - inline RealType mean(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const triangular_distribution& dist) { - static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -415,9 +415,9 @@ namespace boost{ namespace math template - inline RealType variance(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const triangular_distribution& dist) { - static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); RealType upper = dist.upper(); @@ -430,9 +430,9 @@ namespace boost{ namespace math } // RealType variance(const triangular_distribution& dist) template - inline RealType mode(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const triangular_distribution& dist) { - static const char* function = "boost::math::mode(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const triangular_distribution<%1%>&)"; RealType mode = dist.mode(); RealType result = 0; // of checks. if(false == detail::check_triangular_mode(function, mode, &result, Policy())) @@ -443,10 +443,10 @@ namespace boost{ namespace math } // RealType mode template - inline RealType median(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const triangular_distribution& dist) { BOOST_MATH_STD_USING // ADL of std functions. - static const char* function = "boost::math::median(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::median(const triangular_distribution<%1%>&)"; RealType mode = dist.mode(); RealType result = 0; // of checks. if(false == detail::check_triangular_mode(function, mode, &result, Policy())) @@ -466,11 +466,11 @@ namespace boost{ namespace math } // RealType mode template - inline RealType skewness(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const triangular_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions using namespace boost::math::constants; // for root_two - static const char* function = "boost::math::skewness(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::skewness(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType mode = dist.mode(); @@ -487,9 +487,9 @@ namespace boost{ namespace math } // RealType skewness(const triangular_distribution& dist) template - inline RealType kurtosis(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const triangular_distribution& dist) { // These checks may be belt and braces as should have been checked on construction? - static const char* function = "boost::math::kurtosis(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType upper = dist.upper(); RealType mode = dist.mode(); @@ -502,9 +502,9 @@ namespace boost{ namespace math } // RealType kurtosis_excess(const triangular_distribution& dist) template - inline RealType kurtosis_excess(const triangular_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const triangular_distribution& dist) { // These checks may be belt and braces as should have been checked on construction? - static const char* function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)"; RealType lower = dist.lower(); RealType upper = dist.upper(); RealType mode = dist.mode(); diff --git a/include/boost/math/distributions/uniform.hpp b/include/boost/math/distributions/uniform.hpp index 856c144e36..c4dc77e783 100644 --- a/include/boost/math/distributions/uniform.hpp +++ b/include/boost/math/distributions/uniform.hpp @@ -26,7 +26,7 @@ namespace boost{ namespace math namespace detail { template - inline bool check_uniform_lower( + inline BOOST_GPU_ENABLED bool check_uniform_lower( const char* function, RealType lower, RealType* result, const Policy& pol) @@ -45,7 +45,7 @@ namespace boost{ namespace math } // bool check_uniform_lower( template - inline bool check_uniform_upper( + inline BOOST_GPU_ENABLED bool check_uniform_upper( const char* function, RealType upper, RealType* result, const Policy& pol) @@ -64,7 +64,7 @@ namespace boost{ namespace math } // bool check_uniform_upper( template - inline bool check_uniform_x( + inline BOOST_GPU_ENABLED bool check_uniform_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -83,7 +83,7 @@ namespace boost{ namespace math } // bool check_uniform_x template - inline bool check_uniform( + inline BOOST_GPU_ENABLED bool check_uniform( const char* function, RealType lower, RealType upper, @@ -116,19 +116,19 @@ namespace boost{ namespace math typedef RealType value_type; typedef Policy policy_type; - uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor. + BOOST_GPU_ENABLED uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor. : m_lower(l_lower), m_upper(l_upper) // Default is standard uniform distribution. { RealType result; detail::check_uniform("boost::math::uniform_distribution<%1%>::uniform_distribution", l_lower, l_upper, &result, Policy()); } // Accessor functions. - RealType lower()const + BOOST_GPU_ENABLED RealType lower()const { return m_lower; } - RealType upper()const + BOOST_GPU_ENABLED RealType upper()const { return m_upper; } @@ -157,7 +157,7 @@ namespace boost{ namespace math } template - inline RealType pdf(const uniform_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType pdf(const uniform_distribution& dist, const RealType& x) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -182,7 +182,7 @@ namespace boost{ namespace math } // RealType pdf(const uniform_distribution& dist, const RealType& x) template - inline RealType cdf(const uniform_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const uniform_distribution& dist, const RealType& x) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -207,7 +207,7 @@ namespace boost{ namespace math } // RealType cdf(const uniform_distribution& dist, const RealType& x) template - inline RealType quantile(const uniform_distribution& dist, const RealType& p) + inline BOOST_GPU_ENABLED RealType quantile(const uniform_distribution& dist, const RealType& p) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -232,7 +232,7 @@ namespace boost{ namespace math } // RealType quantile(const uniform_distribution& dist, const RealType& p) template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { RealType lower = c.dist.lower(); RealType upper = c.dist.upper(); @@ -258,7 +258,7 @@ namespace boost{ namespace math } // RealType cdf(const complemented2_type, RealType>& c) template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { RealType lower = c.dist.lower(); RealType upper = c.dist.upper(); @@ -284,7 +284,7 @@ namespace boost{ namespace math } // RealType quantile(const complemented2_type, RealType>& c) template - inline RealType mean(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -297,7 +297,7 @@ namespace boost{ namespace math } // RealType mean(const uniform_distribution& dist) template - inline RealType variance(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -311,7 +311,7 @@ namespace boost{ namespace math } // RealType variance(const uniform_distribution& dist) template - inline RealType mode(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -325,7 +325,7 @@ namespace boost{ namespace math } template - inline RealType median(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -337,7 +337,7 @@ namespace boost{ namespace math return (lower + upper) / 2; // } template - inline RealType skewness(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -350,7 +350,7 @@ namespace boost{ namespace math } // RealType skewness(const uniform_distribution& dist) template - inline RealType kurtosis_excess(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const uniform_distribution& dist) { RealType lower = dist.lower(); RealType upper = dist.upper(); @@ -363,7 +363,7 @@ namespace boost{ namespace math } // RealType kurtosis_excess(const uniform_distribution& dist) template - inline RealType kurtosis(const uniform_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const uniform_distribution& dist) { return kurtosis_excess(dist) + 3; } diff --git a/include/boost/math/distributions/weibull.hpp b/include/boost/math/distributions/weibull.hpp index da1189090c..11ac291f3a 100644 --- a/include/boost/math/distributions/weibull.hpp +++ b/include/boost/math/distributions/weibull.hpp @@ -23,7 +23,7 @@ namespace boost{ namespace math namespace detail{ template -inline bool check_weibull_shape( +inline BOOST_GPU_ENABLED bool check_weibull_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -39,7 +39,7 @@ inline bool check_weibull_shape( } template -inline bool check_weibull_x( +inline BOOST_GPU_ENABLED bool check_weibull_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -55,7 +55,7 @@ inline bool check_weibull_x( } template -inline bool check_weibull( +inline BOOST_GPU_ENABLED bool check_weibull( const char* function, RealType scale, RealType shape, @@ -73,19 +73,19 @@ class weibull_distribution typedef RealType value_type; typedef Policy policy_type; - weibull_distribution(RealType l_shape, RealType l_scale = 1) + BOOST_GPU_ENABLED weibull_distribution(RealType l_shape, RealType l_scale = 1) : m_shape(l_shape), m_scale(l_scale) { RealType result; detail::check_weibull("boost::math::weibull_distribution<%1%>::weibull_distribution", l_scale, l_shape, &result, Policy()); } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { return m_shape; } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { return m_scale; } @@ -117,11 +117,11 @@ inline const std::pair support(const weibull_distribution -inline RealType pdf(const weibull_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType pdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -151,11 +151,11 @@ inline RealType pdf(const weibull_distribution& dist, const Re } template -inline RealType cdf(const weibull_distribution& dist, const RealType& x) +inline BOOST_GPU_ENABLED RealType cdf(const weibull_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -172,11 +172,11 @@ inline RealType cdf(const weibull_distribution& dist, const Re } template -inline RealType quantile(const weibull_distribution& dist, const RealType& p) +inline BOOST_GPU_ENABLED RealType quantile(const weibull_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -196,11 +196,11 @@ inline RealType quantile(const weibull_distribution& dist, con } template -inline RealType cdf(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -217,11 +217,11 @@ inline RealType cdf(const complemented2_type -inline RealType quantile(const complemented2_type, RealType>& c) +inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)"; RealType shape = c.dist.shape(); RealType scale = c.dist.scale(); @@ -242,11 +242,11 @@ inline RealType quantile(const complemented2_type -inline RealType mean(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType mean(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::mean(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -260,12 +260,12 @@ inline RealType mean(const weibull_distribution& dist) } template -inline RealType variance(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType variance(const weibull_distribution& dist) { RealType shape = dist.shape(); RealType scale = dist.scale(); - static const char* function = "boost::math::variance(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const weibull_distribution<%1%>)"; RealType result = 0; if(false == detail::check_weibull(function, scale, shape, &result, Policy())) @@ -280,11 +280,11 @@ inline RealType variance(const weibull_distribution& dist) } template -inline RealType mode(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType mode(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::mode(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mode(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -301,11 +301,11 @@ inline RealType mode(const weibull_distribution& dist) } template -inline RealType median(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType median(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::median(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::median(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); // Wikipedia k RealType scale = dist.scale(); // Wikipedia lambda @@ -321,11 +321,11 @@ inline RealType median(const weibull_distribution& dist) } template -inline RealType skewness(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType skewness(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::skewness(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::skewness(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -347,11 +347,11 @@ inline RealType skewness(const weibull_distribution& dist) } template -inline RealType kurtosis_excess(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis_excess(const weibull_distribution& dist) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)"; RealType shape = dist.shape(); RealType scale = dist.scale(); @@ -377,7 +377,7 @@ inline RealType kurtosis_excess(const weibull_distribution& di } template -inline RealType kurtosis(const weibull_distribution& dist) +inline BOOST_GPU_ENABLED RealType kurtosis(const weibull_distribution& dist) { return kurtosis_excess(dist) + 3; } diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index 124337ee87..d23af3d43f 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -34,6 +34,10 @@ #endif #include +#ifdef __CUDACC__ +#include +#endif + namespace boost{ namespace math{ class evaluation_error : public std::runtime_error @@ -183,7 +187,7 @@ inline T raise_domain_error( } template -inline BOOST_MATH_CONSTEXPR T raise_domain_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_domain_error( const char* , const char* , const T& , @@ -191,11 +195,15 @@ inline BOOST_MATH_CONSTEXPR T raise_domain_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: +#ifdef __CUDA_ARCH__ + return CUDART_INF_F; +#else return std::numeric_limits::quiet_NaN(); +#endif } template -inline T raise_domain_error( +inline BOOST_GPU_ENABLED T raise_domain_error( const char* , const char* , const T& , @@ -228,7 +236,7 @@ inline T raise_pole_error( } template -inline BOOST_MATH_CONSTEXPR T raise_pole_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_pole_error( const char* function, const char* message, const T& val, @@ -282,18 +290,22 @@ inline T raise_overflow_error( } template -inline BOOST_MATH_CONSTEXPR T raise_overflow_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_overflow_error( const char* , const char* , const ::boost::math::policies::overflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: +#ifdef __CUDA_ARCH__ + return CUDART_INF; +#else return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); +#endif } template -inline BOOST_MATH_CONSTEXPR T raise_overflow_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_overflow_error( const char* , const char* , const T&, @@ -301,7 +313,11 @@ inline BOOST_MATH_CONSTEXPR T raise_overflow_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: +#ifdef __CUDA_ARCH__ + return CUDART_INF; +#else return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); +#endif } template @@ -364,7 +380,7 @@ inline T raise_underflow_error( } template -inline BOOST_MATH_CONSTEXPR T raise_underflow_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_underflow_error( const char* , const char* , const ::boost::math::policies::underflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) @@ -408,7 +424,7 @@ inline T raise_denorm_error( } template -inline BOOST_MATH_CONSTEXPR T raise_denorm_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_denorm_error( const char* , const char* , const T& val, @@ -455,7 +471,7 @@ inline T raise_evaluation_error( } template -inline BOOST_MATH_CONSTEXPR T raise_evaluation_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_evaluation_error( const char* , const char* , const T& val, @@ -503,7 +519,7 @@ inline TargetType raise_rounding_error( } template -inline BOOST_MATH_CONSTEXPR TargetType raise_rounding_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR TargetType raise_rounding_error( const char* , const char* , const T& val, @@ -516,6 +532,59 @@ inline BOOST_MATH_CONSTEXPR TargetType raise_rounding_error( return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); } +#ifdef __CUDA_ARCH__ +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR float raise_rounding_error( + const char*, + const char*, + const T& val, + const float&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? FLT_MAX : -FLT_MAX; +} +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR double raise_rounding_error( + const char*, + const char*, + const T& val, + const double&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? DBL_MAX : -DBL_MAX; +} +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR int raise_rounding_error( + const char*, + const char*, + const T& val, + const int&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? INT_MAX : INT_MIN; +} +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long raise_rounding_error( + const char*, + const char*, + const T& val, + const long&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? LONG_MAX : LONG_MIN; +} +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long long raise_rounding_error( + const char*, + const char*, + const T& val, + const long long&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? LLONG_MAX : LLONG_MIN; +} +#endif + template inline TargetType raise_rounding_error( const char* , @@ -570,7 +639,7 @@ inline T raise_indeterminate_result_error( } template -inline BOOST_MATH_CONSTEXPR T raise_indeterminate_result_error( +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_indeterminate_result_error( const char* , const char* , const T& , @@ -610,7 +679,7 @@ inline T raise_indeterminate_result_error( } // namespace detail template -inline BOOST_MATH_CONSTEXPR T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::domain_error_type policy_type; return detail::raise_domain_error( @@ -619,7 +688,7 @@ inline BOOST_MATH_CONSTEXPR T raise_domain_error(const char* function, const cha } template -inline BOOST_MATH_CONSTEXPR T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::pole_error_type policy_type; return detail::raise_pole_error( @@ -628,7 +697,7 @@ inline BOOST_MATH_CONSTEXPR T raise_pole_error(const char* function, const char* } template -inline BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const char* message, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const char* message, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::overflow_error_type policy_type; return detail::raise_overflow_error( @@ -637,7 +706,7 @@ inline BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const c } template -inline BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::overflow_error_type policy_type; return detail::raise_overflow_error( @@ -646,7 +715,7 @@ inline BOOST_MATH_CONSTEXPR T raise_overflow_error(const char* function, const c } template -inline BOOST_MATH_CONSTEXPR T raise_underflow_error(const char* function, const char* message, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_underflow_error(const char* function, const char* message, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::underflow_error_type policy_type; return detail::raise_underflow_error( @@ -655,7 +724,7 @@ inline BOOST_MATH_CONSTEXPR T raise_underflow_error(const char* function, const } template -inline BOOST_MATH_CONSTEXPR T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::denorm_error_type policy_type; return detail::raise_denorm_error( @@ -665,7 +734,7 @@ inline BOOST_MATH_CONSTEXPR T raise_denorm_error(const char* function, const cha } template -inline BOOST_MATH_CONSTEXPR T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::evaluation_error_type policy_type; return detail::raise_evaluation_error( @@ -674,7 +743,7 @@ inline BOOST_MATH_CONSTEXPR T raise_evaluation_error(const char* function, const } template -inline BOOST_MATH_CONSTEXPR TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::rounding_error_type policy_type; return detail::raise_rounding_error( @@ -683,7 +752,7 @@ inline BOOST_MATH_CONSTEXPR TargetType raise_rounding_error(const char* function } template -inline BOOST_MATH_CONSTEXPR T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) BOOST_NOEXCEPT_IF(is_noexcept_error_policy::value && BOOST_MATH_IS_FLOAT(T)) { typedef typename Policy::indeterminate_result_error_type policy_type; return detail::raise_indeterminate_result_error( @@ -763,28 +832,28 @@ inline bool check_denorm(std::complex val, R* result, const char* function, c // Default instantiations with ignore_error policy. template -inline BOOST_MATH_CONSTEXPR bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -inline BOOST_MATH_CONSTEXPR bool check_overflow(std::complex /* val */, R* /* result */, const char* /* function */, const overflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_overflow(std::complex /* val */, R* /* result */, const char* /* function */, const overflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -inline BOOST_MATH_CONSTEXPR bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -inline BOOST_MATH_CONSTEXPR bool check_underflow(std::complex /* val */, R* /* result */, const char* /* function */, const underflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_underflow(std::complex /* val */, R* /* result */, const char* /* function */, const underflow_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -inline BOOST_MATH_CONSTEXPR bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } template -inline BOOST_MATH_CONSTEXPR bool check_denorm(std::complex /* val */, R* /* result*/, const char* /* function */, const denorm_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR bool check_denorm(std::complex /* val */, R* /* result*/, const char* /* function */, const denorm_error&) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T)) { return false; } } // namespace detail template -inline R checked_narrowing_cast(T val, const char* function) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +inline BOOST_GPU_ENABLED R checked_narrowing_cast(T val, const char* function) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { typedef typename Policy::overflow_error_type overflow_type; typedef typename Policy::underflow_error_type underflow_type; @@ -804,7 +873,7 @@ inline R checked_narrowing_cast(T val, const char* function) BOOST_NOEXCEPT_IF(B } template -inline void check_series_iterations(const char* function, boost::uintmax_t max_iter, const Policy& pol) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +inline BOOST_GPU_ENABLED void check_series_iterations(const char* function, boost::uintmax_t max_iter, const Policy& pol) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { if(max_iter >= policies::get_max_series_iterations()) raise_evaluation_error( @@ -813,7 +882,7 @@ inline void check_series_iterations(const char* function, boost::uintmax_t max_i } template -inline void check_root_iterations(const char* function, boost::uintmax_t max_iter, const Policy& pol) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) +inline BOOST_GPU_ENABLED void check_root_iterations(const char* function, boost::uintmax_t max_iter, const Policy& pol) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy::value) { if(max_iter >= policies::get_max_root_iterations()) raise_evaluation_error( diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp index c1e1a7be4a..a4b7149f04 100644 --- a/include/boost/math/policies/policy.hpp +++ b/include/boost/math/policies/policy.hpp @@ -33,9 +33,9 @@ namespace boost{ namespace math{ namespace tools{ template -BOOST_MATH_CONSTEXPR int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT; +BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT; template -BOOST_MATH_CONSTEXPR T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T); +BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T); } @@ -46,7 +46,7 @@ namespace policies{ // // Special cases for exceptions disabled first: // -#ifdef BOOST_NO_EXCEPTIONS +#if defined(BOOST_NO_EXCEPTIONS) # ifndef BOOST_MATH_DOMAIN_ERROR_POLICY # define BOOST_MATH_DOMAIN_ERROR_POLICY errno_on_error # endif @@ -63,6 +63,35 @@ namespace policies{ # define BOOST_MATH_ROUNDING_ERROR_POLICY errno_on_error # endif #endif + + // + // Special cases for CUDA devices: + // +#ifdef __CUDA_ARCH__ +# ifndef BOOST_MATH_DOMAIN_ERROR_POLICY +# define BOOST_MATH_DOMAIN_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_POLE_ERROR_POLICY +# define BOOST_MATH_POLE_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY +# define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_EVALUATION_ERROR_POLICY +# define BOOST_MATH_EVALUATION_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_ROUNDING_ERROR_POLICY +# define BOOST_MATH_ROUNDING_ERROR_POLICY ignore_error +# endif +# ifndef BOOST_MATH_PROMOTE_FLOAT_POLICY +# define BOOST_MATH_PROMOTE_FLOAT_POLICY false +# endif +# ifndef BOOST_MATH_PROMOTE_DOUBLE_POLICY +# define BOOST_MATH_PROMOTE_DOUBLE_POLICY false +# endif +#endif + + // // Then the regular cases: // @@ -378,8 +407,12 @@ struct default_args typedef default_policy arg2; }; +template struct fake_default_arg { typedef default_policy type; }; +template <> struct fake_default_arg { typedef digits10<0> type; }; + typedef default_args::arg1 forwarding_arg1; typedef default_args::arg2 forwarding_arg2; +typedef fake_default_arg::type fake_default_arg_placeholder; } // detail // @@ -490,7 +523,7 @@ struct policy -struct policy +struct policy { public: typedef domain_error<> domain_error_type; @@ -639,7 +672,7 @@ struct normalise, default_policy, default_policy, default_policy, - default_policy> + detail::fake_default_arg_placeholder> { typedef policy type; }; @@ -847,19 +880,19 @@ struct precision namespace detail{ template -inline BOOST_MATH_CONSTEXPR int digits_imp(mpl::true_ const&) BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits_imp(mpl::true_ const&) BOOST_NOEXCEPT { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS BOOST_STATIC_ASSERT( ::std::numeric_limits::is_specialized); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); #endif typedef typename boost::math::policies::precision::type p_t; return p_t::value; } template -inline BOOST_MATH_CONSTEXPR int digits_imp(mpl::false_ const&) BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits_imp(mpl::false_ const&) BOOST_NOEXCEPT { return tools::digits(); } @@ -867,26 +900,26 @@ inline BOOST_MATH_CONSTEXPR int digits_imp(mpl::false_ const&) BOOST_NOEXCEPT } // namespace detail template -inline BOOST_MATH_CONSTEXPR int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT { typedef mpl::bool_< std::numeric_limits::is_specialized > tag_type; return detail::digits_imp(tag_type()); } template -inline BOOST_MATH_CONSTEXPR int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT { return boost::math::policies::digits() * 301 / 1000L; } template -inline BOOST_MATH_CONSTEXPR unsigned long get_max_series_iterations() BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED unsigned long get_max_series_iterations() BOOST_NOEXCEPT { typedef typename Policy::max_series_iterations_type iter_type; return iter_type::value; } template -inline BOOST_MATH_CONSTEXPR unsigned long get_max_root_iterations() BOOST_NOEXCEPT +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED unsigned long get_max_root_iterations() BOOST_NOEXCEPT { typedef typename Policy::max_root_iterations_type iter_type; return iter_type::value; @@ -897,7 +930,7 @@ namespace detail{ template struct series_factor_calc { - static T get() BOOST_MATH_NOEXCEPT(T) + static BOOST_GPU_ENABLED T get() BOOST_MATH_NOEXCEPT(T) { return ldexp(T(1.0), 1 - Digits::value); } @@ -906,7 +939,7 @@ struct series_factor_calc template struct series_factor_calc { - static BOOST_MATH_CONSTEXPR T get() BOOST_MATH_NOEXCEPT(T) + static BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get() BOOST_MATH_NOEXCEPT(T) { return boost::math::tools::epsilon(); } @@ -914,7 +947,7 @@ struct series_factor_calc template struct series_factor_calc { - static BOOST_MATH_CONSTEXPR T get() BOOST_MATH_NOEXCEPT(T) + static BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get() BOOST_MATH_NOEXCEPT(T) { return 1 / static_cast(static_cast(1u) << (Digits::value - 1)); } @@ -922,21 +955,21 @@ struct series_factor_calc template struct series_factor_calc { - static BOOST_MATH_CONSTEXPR T get() BOOST_MATH_NOEXCEPT(T) + static BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get() BOOST_MATH_NOEXCEPT(T) { return boost::math::tools::epsilon(); } }; template -inline BOOST_MATH_CONSTEXPR T get_epsilon_imp(mpl::true_ const&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get_epsilon_imp(mpl::true_ const&) BOOST_MATH_NOEXCEPT(T) { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS BOOST_STATIC_ASSERT( ::std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT( ::std::numeric_limits::radix == 2); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); - BOOST_ASSERT(::std::numeric_limits::radix == 2); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::radix == 2); #endif typedef typename boost::math::policies::precision::type p_t; typedef mpl::bool_::digits> is_small_int; @@ -945,7 +978,7 @@ inline BOOST_MATH_CONSTEXPR T get_epsilon_imp(mpl::true_ const&) BOOST_MATH_NOEX } template -inline BOOST_MATH_CONSTEXPR T get_epsilon_imp(mpl::false_ const&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get_epsilon_imp(mpl::false_ const&) BOOST_MATH_NOEXCEPT(T) { return tools::epsilon(); } @@ -953,7 +986,7 @@ inline BOOST_MATH_CONSTEXPR T get_epsilon_imp(mpl::false_ const&) BOOST_MATH_NOE } // namespace detail template -inline BOOST_MATH_CONSTEXPR T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { typedef mpl::bool_< (std::numeric_limits::is_specialized && (std::numeric_limits::radix == 2)) > tag_type; return detail::get_epsilon_imp(tag_type()); diff --git a/include/boost/math/quadrature/cuda_naive_monte_carlo.hpp b/include/boost/math/quadrature/cuda_naive_monte_carlo.hpp new file mode 100644 index 0000000000..37258ea9de --- /dev/null +++ b/include/boost/math/quadrature/cuda_naive_monte_carlo.hpp @@ -0,0 +1,239 @@ +/* + * Copyright Nick Thompson, 2018 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP +#define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP +#include +#include +#include +#include +#include +#include + // For the CUDA runtime routines (prefixed with "cuda_") +#include +#include +#include +#include + +namespace boost { namespace math { namespace quadrature { namespace detail{ + + template + struct pair_add + { + __host__ __device__ thrust::pair operator()(const thrust::pair& a, const thrust::pair& b)const + { + return thrust::pair(a.first + b.first, a.second + b.second); + } + }; + + template + void __global__ cuda_naive_monte_carlo_device_proc(F f, Gen* seeds, thrust::pair* sums, const Real* p_start_locations, const Real* p_scales, unsigned n_calls, Real* p_storage, unsigned n_dimentions, bool is_first) + { + int id = blockIdx.x * blockDim.x + threadIdx.x; + + Gen gen(seeds[id]); + Real* storage_base = p_storage + id * n_dimentions; + thrust::uniform_real_distribution dist(0, 1); + Real sum(0); + Real sigma_squared(0); + Real c(0); + for (unsigned i = 0; i < n_calls; ++i) + { + for (unsigned j = 0; j < n_dimentions; ++j) + storage_base[j] = p_start_locations[j] + p_scales[j] * dist(gen); + Real fv = (f)(storage_base); + Real y = fv - c; + Real t = sum + y; + c = (t - sum) - y; + sum = t; + sigma_squared += fv * fv; + } + seeds[id] = gen; + if (is_first) + sums[id] = thrust::pair(sum, sigma_squared); + else + { + sums[id].first += sum; + sums[id].second += sigma_squared; + } + } + + template + void __global__ cuda_naive_monte_carlo_fast_device_proc(F f, Gen* seeds, thrust::pair* sums, const Real* p_start_locations, const Real* p_scales, unsigned n_calls, Real* p_storage, unsigned n_dimentions, bool is_first) + { + int id = blockIdx.x * blockDim.x + threadIdx.x; + + Gen gen(seeds[id]); + Real* storage_base = p_storage + id * n_dimentions; + thrust::uniform_real_distribution dist(0, 1); + Real sum(0); + Real sigma_squared(0); + for (unsigned i = 0; i < n_calls; ++i) + { + for (unsigned j = 0; j < n_dimentions; ++j) + storage_base[j] = p_start_locations[j] + p_scales[j] * dist(gen); + Real fv = (f)(storage_base); + sum += fv; + sigma_squared += fv * fv; + } + seeds[id] = gen; + if (is_first) + sums[id] = thrust::pair(sum, sigma_squared); + else + { + sums[id].first += sum; + sums[id].second += sigma_squared; + } + } + +} // namespace detail + + template + struct cuda_naive_monte_carlo + { + cuda_naive_monte_carlo(const F& integrand, + std::vector> const & bounds, + const MasterGen& seed) : m_f(integrand), m_gen(seed), m_volume(1), m_sigma(0), m_sigma_squares(0), m_calls(0) + { + auto it = bounds.begin(); + while (it != bounds.end()) + { + m_start_locations.push_back(it->first); + m_scale_factors.push_back(it->second - it->first); + m_volume *= (it->second - it->first); + ++it; + } + } + cuda_naive_monte_carlo(const F& integrand, + std::vector> const & bounds) + : m_f(integrand), m_volume(1), m_sigma(0), m_sigma_squares(0), m_calls(0) + { + auto it = bounds.begin(); + while (it != bounds.end()) + { + m_start_locations.push_back(it->first); + m_scale_factors.push_back(it->second - it->first); + m_volume *= (it->second - it->first); + ++it; + } + } + + template + static T* to_pointer(T* p) { return p; } + template + static T* to_pointer(thrust::device_ptr p) { return p.get(); } + + Real integrate(Real error_request, boost::uintmax_t calls_per_thread = 1024, boost::uintmax_t max_calls_per_thread = 250000, bool is_compensated = true) + { + boost::uintmax_t threads; + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + threads = deviceProp.maxThreadsPerMultiProcessor * deviceProp.multiProcessorCount; + + thrust::device_vector starts = m_start_locations; + thrust::device_vector scales = m_scale_factors; + thrust::device_vector storage(threads * m_start_locations.size()); + thrust::device_vector > sums(threads); + thrust::device_vector seeds(threads); + thrust::host_vector host_seeds(threads); + typedef typename ThreadGen::result_type seed_type; + std::uniform_int_distribution ui_dist((std::numeric_limits::min)(), (std::numeric_limits::max)()); + for (unsigned i = 0; i < host_seeds.size(); ++i) + host_seeds[i].seed(ui_dist(m_gen)); + seeds = host_seeds; + bool first_call = true; + bool have_variance = false; + Real sample_variance = 0; + do { + if (m_calls) + { + // If we've been called before, adjust calls_per_thread according + // to the sample variance: + Real err = sqrt(variance() / m_calls); + boost::uintmax_t target_total = variance() > 0 ? (boost::uintmax_t)std::floor(1.05 * m_calls * err * err / (error_request * error_request)) : 2 * m_calls; + calls_per_thread = 1 + target_total / threads; + have_variance = true; + sample_variance = variance(); + } + do + { + if (!first_call) + { + // Update how many calls per thread to try based on our variance estimate: + boost::uintmax_t target_total; + if (sample_variance > 0) + target_total = static_cast(1.05 * m_calls * boost::math::pow<2>(sqrt(sample_variance / m_calls)) / (error_request * error_request) - m_calls); + else + target_total = 2 * m_calls; + calls_per_thread = 1 + target_total / threads; + } + if (calls_per_thread > max_calls_per_thread) + calls_per_thread = max_calls_per_thread; + // std::cout << "Executing with calls_per_thread = " << calls_per_thread << std::endl; + if(is_compensated) + detail::cuda_naive_monte_carlo_device_proc << > > (m_f, to_pointer(seeds.data()), to_pointer(sums.data()), to_pointer(starts.data()), to_pointer(scales.data()), calls_per_thread, to_pointer(storage.data()), scales.size(), first_call); + else + detail::cuda_naive_monte_carlo_fast_device_proc << > > (m_f, to_pointer(seeds.data()), to_pointer(sums.data()), to_pointer(starts.data()), to_pointer(scales.data()), calls_per_thread, to_pointer(storage.data()), scales.size(), first_call); + first_call = false; + m_calls += threads * calls_per_thread; + // If we haven't been called before then get an estimate of the sample + // variance based on the first sum, this removes the need to reduce + // the whole thing which is relatively expensive: + if (!have_variance) + { + std::pair first_result; + cudaMemcpy(&first_result, to_pointer(sums.data()), sizeof(first_result), cudaMemcpyDeviceToHost); + // It's tempting to use calls_per_thread as the sample size for the sums: but we may + // have been through this loop more than once and calls_per_thread is just the "extra" + // calls from the last run through. So divide the total number of calls by the number + // of threads to get the actual sample size for each sub-sum: + sample_variance = m_volume * m_volume * (first_result.second - first_result.first * first_result.first / (m_calls / threads)) / ((m_calls / threads) - 1); + } + // std::cout << "Estimated error = " << sqrt(sample_variance / m_calls) << std::endl; + } while ((sample_variance < 0) || (sqrt(sample_variance / m_calls) > error_request)); + + // Reduce the results: + thrust::pair sum = thrust::reduce(sums.begin(), sums.end(), thrust::pair(0, 0), detail::pair_add()); + m_sigma += sum.first; + m_sigma_squares += sum.second; + first_call = true; + // + // Since we used an estimate for the variance in the code above, now that + // we have the true variance after a full reduction, we had better double check + // the termination condition: + // + // std::cout << "Estimated error after reduce = " << sqrt(variance() / m_calls) << std::endl; + } while ((variance() < 0) || (sqrt(variance() / m_calls) > error_request)); + + return m_volume * m_sigma / m_calls; + } + + Real variance()const + { + return m_volume * m_volume * (m_sigma_squares - m_sigma * m_sigma / m_calls) / (m_calls - 1); + } + Real current_error_estimate() const + { + using std::sqrt; + return sqrt(variance() / m_calls); + } + uint64_t calls() const + { + return m_calls; + } + + private: + MasterGen m_gen; + Real m_volume, m_sigma, m_sigma_squares; + boost::uintmax_t m_calls; + F m_f; + thrust::host_vector m_start_locations; + thrust::host_vector m_scale_factors; + }; + +}}} +#endif diff --git a/include/boost/math/special_functions/acosh.hpp b/include/boost/math/special_functions/acosh.hpp index 3b7edbad3f..34b514cef7 100644 --- a/include/boost/math/special_functions/acosh.hpp +++ b/include/boost/math/special_functions/acosh.hpp @@ -32,7 +32,7 @@ namespace boost namespace detail { template - inline T acosh_imp(const T x, const Policy& pol) + inline BOOST_GPU_ENABLED T acosh_imp(const T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -76,7 +76,7 @@ namespace boost } template - inline typename tools::promote_args::type acosh(T x, const Policy&) + inline BOOST_GPU_ENABLED typename tools::promote_args::type acosh(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -91,7 +91,7 @@ namespace boost "boost::math::acosh<%1%>(%1%)"); } template - inline typename tools::promote_args::type acosh(T x) + inline BOOST_GPU_ENABLED typename tools::promote_args::type acosh(T x) { return boost::math::acosh(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/asinh.hpp b/include/boost/math/special_functions/asinh.hpp index e55a356284..abb74f9d44 100644 --- a/include/boost/math/special_functions/asinh.hpp +++ b/include/boost/math/special_functions/asinh.hpp @@ -32,7 +32,7 @@ namespace boost { namespace detail{ template - inline T asinh_imp(const T x, const Policy& pol) + inline BOOST_GPU_ENABLED T asinh_imp(const T x, const Policy& pol) { BOOST_MATH_STD_USING @@ -85,12 +85,12 @@ namespace boost } template - inline typename tools::promote_args::type asinh(T x) + inline BOOST_GPU_ENABLED typename tools::promote_args::type asinh(T x) { return boost::math::asinh(x, policies::policy<>()); } template - inline typename tools::promote_args::type asinh(T x, const Policy&) + inline BOOST_GPU_ENABLED typename tools::promote_args::type asinh(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; diff --git a/include/boost/math/special_functions/atanh.hpp b/include/boost/math/special_functions/atanh.hpp index 871a465a8c..977e07ae48 100644 --- a/include/boost/math/special_functions/atanh.hpp +++ b/include/boost/math/special_functions/atanh.hpp @@ -34,7 +34,7 @@ namespace boost // This is the main fare template - inline T atanh_imp(const T x, const Policy& pol) + inline BOOST_GPU_ENABLED T atanh_imp(const T x, const Policy& pol) { BOOST_MATH_STD_USING static const char* function = "boost::math::atanh<%1%>(%1%)"; @@ -94,7 +94,7 @@ namespace boost } template - inline typename tools::promote_args::type atanh(T x, const Policy&) + inline BOOST_GPU_ENABLED typename tools::promote_args::type atanh(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -109,7 +109,7 @@ namespace boost "boost::math::atanh<%1%>(%1%)"); } template - inline typename tools::promote_args::type atanh(T x) + inline BOOST_GPU_ENABLED typename tools::promote_args::type atanh(T x) { return boost::math::atanh(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/beta.hpp b/include/boost/math/special_functions/beta.hpp index 35b114ef15..5902db2c76 100644 --- a/include/boost/math/special_functions/beta.hpp +++ b/include/boost/math/special_functions/beta.hpp @@ -31,7 +31,7 @@ namespace detail{ // Implementation of Beta(a,b) using the Lanczos approximation: // template -T beta_imp(T a, T b, const Lanczos&, const Policy& pol) +BOOST_GPU_ENABLED T beta_imp(T a, T b, const Lanczos&, const Policy& pol) { BOOST_MATH_STD_USING // for ADL of std names @@ -83,9 +83,10 @@ T beta_imp(T a, T b, const Lanczos&, const Policy& pol) b += 1; } */ + using std::swap; if(a < b) - std::swap(a, b); + BOOST_MATH_CUDA_SAFE_SWAP(a, b); // Lanczos calculation: T agh = static_cast(a + Lanczos::g() - 0.5f); @@ -199,7 +200,7 @@ T beta_imp(T a, T b, const lanczos::undefined_lanczos& /* l */, const Policy& po // horrendous cancellation errors. // template -T ibeta_power_terms(T a, +BOOST_GPU_ENABLED T ibeta_power_terms(T a, T b, T x, T y, @@ -234,11 +235,11 @@ T ibeta_power_terms(T a, // l1 and l2 are the base of the exponents minus one: T l1 = (x * b - y * agh) / agh; T l2 = (y * a - x * bgh) / bgh; - if(((std::min)(fabs(l1), fabs(l2)) < 0.2)) + if((BOOST_MATH_CUDA_SAFE_MIN(fabs(l1), fabs(l2)) < 0.2)) { // when the base of the exponent is very near 1 we get really // gross errors unless extra care is taken: - if((l1 * l2 > 0) || ((std::min)(a, b) < 1)) + if((l1 * l2 > 0) || (BOOST_MATH_CUDA_SAFE_MIN(a, b) < 1)) { // // This first branch handles the simple cases where either: @@ -274,7 +275,7 @@ T ibeta_power_terms(T a, BOOST_MATH_INSTRUMENT_VARIABLE(result); } } - else if((std::max)(fabs(l1), fabs(l2)) < 0.5) + else if(BOOST_MATH_CUDA_SAFE_MAX(fabs(l1), fabs(l2)) < 0.5) { // // Both exponents are near one and both the exponents are @@ -508,8 +509,8 @@ template struct ibeta_series_t { typedef T result_type; - ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {} - T operator()() + BOOST_GPU_ENABLED ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {} + BOOST_GPU_ENABLED T operator()() { T r = result / apn; apn += 1; @@ -524,13 +525,13 @@ struct ibeta_series_t }; template -T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol) +BOOST_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol) { BOOST_MATH_STD_USING T result; - BOOST_ASSERT((p_derivative == 0) || normalised); + BOOST_MATH_ASSERT((p_derivative == 0) || normalised); if(normalised) { @@ -562,7 +563,7 @@ T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_deriva if(p_derivative) { *p_derivative = result * pow(y, b); - BOOST_ASSERT(*p_derivative >= 0); + BOOST_MATH_ASSERT(*p_derivative >= 0); } } else @@ -598,7 +599,7 @@ T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczo BOOST_MATH_STD_USING T result; - BOOST_ASSERT((p_derivative == 0) || normalised); + BOOST_MATH_ASSERT((p_derivative == 0) || normalised); if(normalised) { @@ -652,7 +653,7 @@ T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczo if(p_derivative) { *p_derivative = result * pow(y, b); - BOOST_ASSERT(*p_derivative >= 0); + BOOST_MATH_ASSERT(*p_derivative >= 0); } } else @@ -677,9 +678,9 @@ struct ibeta_fraction2_t { typedef std::pair result_type; - ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {} + BOOST_GPU_ENABLED ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {} - result_type operator()() + BOOST_GPU_ENABLED result_type operator()() { T aN = (a + m - 1) * (a + b + m - 1) * m * (b - m) * x * x; T denom = (a + 2 * m - 1); @@ -702,7 +703,7 @@ struct ibeta_fraction2_t // Evaluate the incomplete beta via the continued fraction representation: // template -inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative) +inline BOOST_GPU_ENABLED T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative) { typedef typename lanczos::lanczos::type lanczos_type; BOOST_MATH_STD_USING @@ -710,7 +711,7 @@ inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, if(p_derivative) { *p_derivative = result; - BOOST_ASSERT(*p_derivative >= 0); + BOOST_MATH_ASSERT(*p_derivative >= 0); } if(result == 0) return result; @@ -725,7 +726,7 @@ inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, // Computes the difference between ibeta(a,b,x) and ibeta(a+k,b,x): // template -T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative) +BOOST_GPU_ENABLED T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative) { typedef typename lanczos::lanczos::type lanczos_type; @@ -735,7 +736,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* if(p_derivative) { *p_derivative = prefix; - BOOST_ASSERT(*p_derivative >= 0); + BOOST_MATH_ASSERT(*p_derivative >= 0); } prefix /= a; if(prefix == 0) @@ -759,7 +760,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* // it is currently only called for small k. // template -inline T rising_factorial_ratio(T a, T b, int k) +inline BOOST_GPU_ENABLED T rising_factorial_ratio(T a, T b, int k) { // calculate: // (a)(a+1)(a+2)...(a+k-1) @@ -803,17 +804,21 @@ template <> struct Pn_size { BOOST_STATIC_CONSTANT(unsigned, value = 30); // 16-20 digit accuracy +#ifndef __CUDA_ARCH__ BOOST_STATIC_ASSERT(::boost::math::max_factorial::value >= 60); +#endif }; template <> struct Pn_size { BOOST_STATIC_CONSTANT(unsigned, value = 50); // ~35-50 digit accuracy +#ifndef __CUDA_ARCH__ BOOST_STATIC_ASSERT(::boost::math::max_factorial::value >= 100); +#endif }; template -T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised) +BOOST_GPU_ENABLED T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised) { typedef typename lanczos::lanczos::type lanczos_type; BOOST_MATH_STD_USING @@ -925,7 +930,7 @@ T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& po // complement of the binomial distribution cdf and use this finite sum. // template -T binomial_ccdf(T n, T k, T x, T y) +BOOST_GPU_ENABLED T binomial_ccdf(T n, T k, T x, T y) { BOOST_MATH_STD_USING // ADL of std names @@ -986,7 +991,7 @@ T binomial_ccdf(T n, T k, T x, T y) // each domain: // template -T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative) +BOOST_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative) { static const char* function = "boost::math::ibeta<%1%>(%1%, %1%, %1%)"; typedef typename lanczos::lanczos::type lanczos_type; @@ -1002,7 +1007,7 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de T fract; T y = 1 - x; - BOOST_ASSERT((p_derivative == 0) || normalised); + BOOST_MATH_ASSERT((p_derivative == 0) || normalised); if(p_derivative) *p_derivative = -1; // value not set. @@ -1098,19 +1103,19 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de return p; } - if((std::min)(a, b) <= 1) + if(BOOST_MATH_CUDA_SAFE_MIN(a, b) <= 1) { if(x > 0.5) { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(a, b); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); invert = !invert; BOOST_MATH_INSTRUMENT_VARIABLE(invert); } - if((std::max)(a, b) <= 1) + if(BOOST_MATH_CUDA_SAFE_MAX(a, b) <= 1) { // Both a,b < 1: - if((a >= (std::min)(T(0.2), b)) || (pow(x, a) <= 0.9)) + if((a >= BOOST_MATH_CUDA_SAFE_MIN(T(0.2), b)) || (pow(x, a) <= 0.9)) { if(!invert) { @@ -1127,8 +1132,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } else { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(a, b); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); invert = !invert; if(y >= 0.3) { @@ -1193,8 +1198,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } else { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(a, b); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); invert = !invert; if(y >= 0.3) @@ -1271,8 +1276,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } if(lambda < 0) { - std::swap(a, b); - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(a, b); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); invert = !invert; BOOST_MATH_INSTRUMENT_VARIABLE(invert); } @@ -1386,15 +1391,15 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de } // template T ibeta_imp(T a, T b, T x, const Lanczos& l, bool inv, bool normalised) template -inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised) +BOOST_GPU_ENABLED inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised) { return ibeta_imp(a, b, x, pol, inv, normalised, static_cast(0)); } template -T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) +BOOST_GPU_ENABLED T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) { - static const char* function = "ibeta_derivative<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "ibeta_derivative<%1%>(%1%,%1%,%1%)"; // // start with the usual error checks: // @@ -1429,7 +1434,7 @@ T ibeta_derivative_imp(T a, T b, T x, const Policy& pol) // Some forwarding functions that dis-ambiguate the third argument type: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, const Policy&, const mpl::true_*) { BOOST_FPU_EXCEPTION_GUARD @@ -1446,7 +1451,7 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::beta_imp(static_cast(a), static_cast(b), evaluation_type(), forwarding_policy()), "boost::math::beta<%1%>(%1%,%1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, RT3 x, const mpl::false_*) { return boost::math::beta(a, b, x, policies::policy<>()); @@ -1459,7 +1464,7 @@ inline typename tools::promote_args::type // and forward to the implementation functions: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, A arg) { typedef typename policies::is_policy::type tag; @@ -1467,14 +1472,14 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b) { return boost::math::beta(a, b, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1491,7 +1496,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1507,14 +1512,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), true, false), "boost::math::betac<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x) { return boost::math::betac(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1530,14 +1535,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), false, true), "boost::math::ibeta<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x) { return boost::math::ibeta(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1553,14 +1558,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy(), true, true), "boost::math::ibetac<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x) { return boost::math::ibetac(a, b, x, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1576,7 +1581,7 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::ibeta_derivative_imp(static_cast(a), static_cast(b), static_cast(x), forwarding_policy()), "boost::math::ibeta_derivative<%1%>(%1%,%1%,%1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x) { return boost::math::ibeta_derivative(a, b, x, policies::policy<>()); diff --git a/include/boost/math/special_functions/binomial.hpp b/include/boost/math/special_functions/binomial.hpp index 9a24fc15bb..558c9fb894 100644 --- a/include/boost/math/special_functions/binomial.hpp +++ b/include/boost/math/special_functions/binomial.hpp @@ -18,11 +18,11 @@ namespace boost{ namespace math{ template -T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) +BOOST_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) { BOOST_STATIC_ASSERT(!boost::is_integral::value); BOOST_MATH_STD_USING - static const char* function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)"; if(k > n) return policies::raise_domain_error( function, @@ -61,13 +61,13 @@ T binomial_coefficient(unsigned n, unsigned k, const Policy& pol) // we'll promote to double: // template <> -inline float binomial_coefficient >(unsigned n, unsigned k, const policies::policy<>& pol) +inline BOOST_GPU_ENABLED float binomial_coefficient >(unsigned n, unsigned k, const policies::policy<>& pol) { return policies::checked_narrowing_cast >(binomial_coefficient(n, k, pol), "boost::math::binomial_coefficient<%1%>(unsigned,unsigned)"); } template -inline T binomial_coefficient(unsigned n, unsigned k) +inline BOOST_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k) { return binomial_coefficient(n, k, policies::policy<>()); } diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp index c34ad39949..9c417a9847 100644 --- a/include/boost/math/special_functions/cbrt.hpp +++ b/include/boost/math/special_functions/cbrt.hpp @@ -40,7 +40,7 @@ struct largest_cbrt_int_type }; template -T cbrt_imp(T z, const Policy& pol) +BOOST_GPU_ENABLED T cbrt_imp(T z, const Policy& pol) { BOOST_MATH_STD_USING // @@ -53,7 +53,7 @@ T cbrt_imp(T z, const Policy& pol) // Expected Error Term: -1.231e-006 // Maximum Relative Change in Control Points: 5.982e-004 // - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { static_cast(0.37568269008611818), static_cast(1.3304968705558024), static_cast(-1.4897101632445036), @@ -61,7 +61,7 @@ T cbrt_imp(T z, const Policy& pol) static_cast(-0.6398703759826468), static_cast(0.13584489959258635), }; - static const T correction[] = { + BOOST_MATH_GPU_STATIC const T correction[] = { static_cast(0.62996052494743658238360530363911), // 2^-2/3 static_cast(0.79370052598409973737585281963615), // 2^-1/3 static_cast(1), @@ -156,7 +156,7 @@ T cbrt_imp(T z, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type cbrt(T z, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type cbrt(T z, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -164,7 +164,7 @@ inline typename tools::promote_args::type cbrt(T z, const Policy& pol) } template -inline typename tools::promote_args::type cbrt(T z) +inline BOOST_GPU_ENABLED typename tools::promote_args::type cbrt(T z) { return cbrt(z, policies::policy<>()); } diff --git a/include/boost/math/special_functions/cos_pi.hpp b/include/boost/math/special_functions/cos_pi.hpp index 669a2c87ae..89681a9ce3 100644 --- a/include/boost/math/special_functions/cos_pi.hpp +++ b/include/boost/math/special_functions/cos_pi.hpp @@ -20,7 +20,7 @@ namespace boost{ namespace math{ namespace detail{ template -T cos_pi_imp(T x, const Policy& pol) +BOOST_GPU_ENABLED T cos_pi_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names // cos of pi*x: @@ -57,7 +57,7 @@ T cos_pi_imp(T x, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type cos_pi(T x, const Policy&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type cos_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -71,7 +71,7 @@ inline typename tools::promote_args::type cos_pi(T x, const Policy&) } template -inline typename tools::promote_args::type cos_pi(T x) +inline BOOST_GPU_ENABLED typename tools::promote_args::type cos_pi(T x) { return boost::math::cos_pi(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/detail/bernoulli_details.hpp b/include/boost/math/special_functions/detail/bernoulli_details.hpp index 75fadbf34a..bc5edf2051 100644 --- a/include/boost/math/special_functions/detail/bernoulli_details.hpp +++ b/include/boost/math/special_functions/detail/bernoulli_details.hpp @@ -205,8 +205,8 @@ struct fixed_vector : private std::allocator allocator_traits::deallocate(alloc, m_data, m_capacity); #endif } - T& operator[](unsigned n) { BOOST_ASSERT(n < m_used); return m_data[n]; } - const T& operator[](unsigned n)const { BOOST_ASSERT(n < m_used); return m_data[n]; } + T& operator[](unsigned n) { BOOST_MATH_ASSERT(n < m_used); return m_data[n]; } + const T& operator[](unsigned n)const { BOOST_MATH_ASSERT(n < m_used); return m_data[n]; } unsigned size()const { return m_used; } unsigned size() { return m_used; } void resize(unsigned n, const T& val) diff --git a/include/boost/math/special_functions/detail/bessel_ik.hpp b/include/boost/math/special_functions/detail/bessel_ik.hpp index aac1781e10..c19956294c 100644 --- a/include/boost/math/special_functions/detail/bessel_ik.hpp +++ b/include/boost/math/special_functions/detail/bessel_ik.hpp @@ -94,8 +94,8 @@ int temme_ik(T v, T x, T* K, T* K1, const Policy& pol) // |x| <= 2, Temme series converge rapidly // |x| > 2, the larger the |x|, the slower the convergence - BOOST_ASSERT(abs(x) <= 2); - BOOST_ASSERT(abs(v) <= 0.5f); + BOOST_MATH_ASSERT(abs(x) <= 2); + BOOST_MATH_ASSERT(abs(v) <= 0.5f); T gp = boost::math::tgamma1pm1(v, pol); T gm = boost::math::tgamma1pm1(-v, pol); @@ -216,7 +216,7 @@ int CF2_ik(T v, T x, T* Kv, T* Kv1, const Policy& pol) // |x| >= |v|, CF2_ik converges rapidly // |x| -> 0, CF2_ik fails to converge - BOOST_ASSERT(abs(x) > 1); + BOOST_MATH_ASSERT(abs(x) > 1); // Steed's algorithm, see Thompson and Barnett, // Journal of Computational Physics, vol 64, 490 (1986) diff --git a/include/boost/math/special_functions/detail/bessel_j0.hpp b/include/boost/math/special_functions/detail/bessel_j0.hpp index ebcab17240..7a30f66ecd 100644 --- a/include/boost/math/special_functions/detail/bessel_j0.hpp +++ b/include/boost/math/special_functions/detail/bessel_j0.hpp @@ -148,7 +148,7 @@ T bessel_j0(T x) if (x <= 4) // x in (0, 4] { T y = x * x; - BOOST_ASSERT(sizeof(P1) == sizeof(Q1)); + BOOST_MATH_ASSERT(sizeof(P1) == sizeof(Q1)); r = evaluate_rational(P1, Q1, y); factor = (x + x1) * ((x - x11/256) - x12); value = factor * r; @@ -156,7 +156,7 @@ T bessel_j0(T x) else if (x <= 8.0) // x in (4, 8] { T y = 1 - (x * x)/64; - BOOST_ASSERT(sizeof(P2) == sizeof(Q2)); + BOOST_MATH_ASSERT(sizeof(P2) == sizeof(Q2)); r = evaluate_rational(P2, Q2, y); factor = (x + x2) * ((x - x21/256) - x22); value = factor * r; @@ -165,8 +165,8 @@ T bessel_j0(T x) { T y = 8 / x; T y2 = y * y; - BOOST_ASSERT(sizeof(PC) == sizeof(QC)); - BOOST_ASSERT(sizeof(PS) == sizeof(QS)); + BOOST_MATH_ASSERT(sizeof(PC) == sizeof(QC)); + BOOST_MATH_ASSERT(sizeof(PS) == sizeof(QS)); rc = evaluate_rational(PC, QC, y2); rs = evaluate_rational(PS, QS, y2); factor = constants::one_div_root_pi() / sqrt(x); diff --git a/include/boost/math/special_functions/detail/bessel_j1.hpp b/include/boost/math/special_functions/detail/bessel_j1.hpp index 91ecd2832d..a9e0baa92d 100644 --- a/include/boost/math/special_functions/detail/bessel_j1.hpp +++ b/include/boost/math/special_functions/detail/bessel_j1.hpp @@ -149,7 +149,7 @@ T bessel_j1(T x) if (w <= 4) // w in (0, 4] { T y = x * x; - BOOST_ASSERT(sizeof(P1) == sizeof(Q1)); + BOOST_MATH_ASSERT(sizeof(P1) == sizeof(Q1)); r = evaluate_rational(P1, Q1, y); factor = w * (w + x1) * ((w - x11/256) - x12); value = factor * r; @@ -157,7 +157,7 @@ T bessel_j1(T x) else if (w <= 8) // w in (4, 8] { T y = x * x; - BOOST_ASSERT(sizeof(P2) == sizeof(Q2)); + BOOST_MATH_ASSERT(sizeof(P2) == sizeof(Q2)); r = evaluate_rational(P2, Q2, y); factor = w * (w + x2) * ((w - x21/256) - x22); value = factor * r; @@ -166,8 +166,8 @@ T bessel_j1(T x) { T y = 8 / w; T y2 = y * y; - BOOST_ASSERT(sizeof(PC) == sizeof(QC)); - BOOST_ASSERT(sizeof(PS) == sizeof(QS)); + BOOST_MATH_ASSERT(sizeof(PC) == sizeof(QC)); + BOOST_MATH_ASSERT(sizeof(PS) == sizeof(QS)); rc = evaluate_rational(PC, QC, y2); rs = evaluate_rational(PS, QS, y2); factor = 1 / (sqrt(w) * constants::root_pi()); diff --git a/include/boost/math/special_functions/detail/bessel_jn.hpp b/include/boost/math/special_functions/detail/bessel_jn.hpp index 2413630637..b3e452d6f5 100644 --- a/include/boost/math/special_functions/detail/bessel_jn.hpp +++ b/include/boost/math/special_functions/detail/bessel_jn.hpp @@ -66,7 +66,7 @@ T bessel_jn(int n, T x, const Policy& pol) return static_cast(0); } - BOOST_ASSERT(n > 1); + BOOST_MATH_ASSERT(n > 1); T scale = 1; if (n < abs(x)) // forward recurrence { diff --git a/include/boost/math/special_functions/detail/bessel_jy.hpp b/include/boost/math/special_functions/detail/bessel_jy.hpp index b67d989b68..152d999237 100644 --- a/include/boost/math/special_functions/detail/bessel_jy.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy.hpp @@ -82,7 +82,7 @@ namespace boost { namespace math { using namespace boost::math::tools; using namespace boost::math::constants; - BOOST_ASSERT(fabs(v) <= 0.5f); // precondition for using this routine + BOOST_MATH_ASSERT(fabs(v) <= 0.5f); // precondition for using this routine T gp = boost::math::tgamma1pm1(v, pol); T gm = boost::math::tgamma1pm1(-v, pol); @@ -197,7 +197,7 @@ namespace boost { namespace math { // |x| >= |v|, CF2_jy converges rapidly // |x| -> 0, CF2_jy fails to converge - BOOST_ASSERT(fabs(x) > 1); + BOOST_MATH_ASSERT(fabs(x) > 1); // modified Lentz's method, complex numbers involved, see // Lentz, Applied Optics, vol 15, 668 (1976) @@ -264,7 +264,7 @@ namespace boost { namespace math { template int bessel_jy(T v, T x, T* J, T* Y, int kind, const Policy& pol) { - BOOST_ASSERT(x >= 0); + BOOST_MATH_ASSERT(x >= 0); T u, Jv, Ju, Yv, Yv1, Yu, Yu1(0), fv, fu; T W, p, q, gamma, current, prev, next; diff --git a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp index 4d7ac485ad..63cf10c5d0 100644 --- a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp @@ -133,7 +133,7 @@ inline bool asymptotic_bessel_large_x_limit(int v, const T& x) // error rates either side of the divide for v < 10000. // At double precision eps^1/8 ~= 0.01. // - BOOST_ASSERT(v >= 0); + BOOST_MATH_ASSERT(v >= 0); return (v ? v : 1) < x * 0.004f; } diff --git a/include/boost/math/special_functions/detail/bessel_jy_series.hpp b/include/boost/math/special_functions/detail/bessel_jy_series.hpp index d50bef84e8..6864d45782 100644 --- a/include/boost/math/special_functions/detail/bessel_jy_series.hpp +++ b/include/boost/math/special_functions/detail/bessel_jy_series.hpp @@ -218,8 +218,8 @@ T bessel_yn_small_z(int n, T z, T* scale, const Policy& pol) // Note that when called we assume that x < epsilon and n is a positive integer. // BOOST_MATH_STD_USING - BOOST_ASSERT(n >= 0); - BOOST_ASSERT((z < policies::get_epsilon())); + BOOST_MATH_ASSERT(n >= 0); + BOOST_MATH_ASSERT((z < policies::get_epsilon())); if(n == 0) { diff --git a/include/boost/math/special_functions/detail/bessel_kn.hpp b/include/boost/math/special_functions/detail/bessel_kn.hpp index 54c4a1cfa7..486bcd138a 100644 --- a/include/boost/math/special_functions/detail/bessel_kn.hpp +++ b/include/boost/math/special_functions/detail/bessel_kn.hpp @@ -56,7 +56,7 @@ T bessel_kn(int n, T x, const Policy& pol) prev = bessel_k0(x); current = bessel_k1(x); int k = 1; - BOOST_ASSERT(k < n); + BOOST_MATH_ASSERT(k < n); T scale = 1; do { diff --git a/include/boost/math/special_functions/detail/bessel_yn.hpp b/include/boost/math/special_functions/detail/bessel_yn.hpp index 62d7377e4f..e27f8e1201 100644 --- a/include/boost/math/special_functions/detail/bessel_yn.hpp +++ b/include/boost/math/special_functions/detail/bessel_yn.hpp @@ -77,7 +77,7 @@ T bessel_yn(int n, T x, const Policy& pol) prev = bessel_y0(x, pol); current = bessel_y1(x, pol); int k = 1; - BOOST_ASSERT(k < n); + BOOST_MATH_ASSERT(k < n); policies::check_series_iterations("boost::math::bessel_y_n<%1%>(%1%,%1%)", n, pol); T mult = 2 * k / x; value = mult * current - prev; diff --git a/include/boost/math/special_functions/detail/erf_inv.hpp b/include/boost/math/special_functions/detail/erf_inv.hpp index 4e48300521..3fe6966d0f 100644 --- a/include/boost/math/special_functions/detail/erf_inv.hpp +++ b/include/boost/math/special_functions/detail/erf_inv.hpp @@ -21,7 +21,7 @@ namespace detail{ // this version is for 80-bit long double's and smaller: // template -T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>*) +BOOST_GPU_ENABLED T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>*) { BOOST_MATH_STD_USING // for ADL of std names. @@ -41,8 +41,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* // long double: Max error found: 1.017064e-20 // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21 // - static const float Y = 0.0891314744949340820313f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.0891314744949340820313f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000508781949658280665617), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00836874819741736770379), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0334806625409744615033), @@ -52,7 +52,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, 0.00822687874676915743155), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00538772965071242932965) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.970005043303290640362), BOOST_MATH_BIG_CONSTANT(T, 64, -1.56574558234175846809), @@ -82,8 +82,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* // long double : Max error found: 6.084616e-20 // Maximum Deviation Found (error term) 4.811e-20 // - static const float Y = 2.249481201171875f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 2.249481201171875f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.202433508355938759655), BOOST_MATH_BIG_CONSTANT(T, 64, 0.105264680699391713268), BOOST_MATH_BIG_CONSTANT(T, 64, 8.37050328343119927838), @@ -94,7 +94,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, 21.1294655448340526258), BOOST_MATH_BIG_CONSTANT(T, 64, -3.67192254707729348546) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 6.24264124854247537712), BOOST_MATH_BIG_CONSTANT(T, 64, 3.9713437953343869095), @@ -135,8 +135,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* if(x < 3) { // Max error found: 1.089051e-20 - static const float Y = 0.807220458984375f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.807220458984375f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.131102781679951906451), BOOST_MATH_BIG_CONSTANT(T, 64, -0.163794047193317060787), BOOST_MATH_BIG_CONSTANT(T, 64, 0.117030156341995252019), @@ -149,7 +149,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, 0.285225331782217055858e-7), BOOST_MATH_BIG_CONSTANT(T, 64, -0.681149956853776992068e-9) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 3.46625407242567245975), BOOST_MATH_BIG_CONSTANT(T, 64, 5.38168345707006855425), @@ -166,8 +166,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* else if(x < 6) { // Max error found: 8.389174e-21 - static const float Y = 0.93995571136474609375f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.93995571136474609375f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0350353787183177984712), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00222426529213447927281), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0185573306514231072324), @@ -178,7 +178,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, -0.230404776911882601748e-9), BOOST_MATH_BIG_CONSTANT(T, 64, 0.266339227425782031962e-11) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 1.3653349817554063097), BOOST_MATH_BIG_CONSTANT(T, 64, 0.762059164553623404043), @@ -194,8 +194,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* else if(x < 18) { // Max error found: 1.481312e-19 - static const float Y = 0.98362827301025390625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.98362827301025390625f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0167431005076633737133), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00112951438745580278863), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00105628862152492910091), @@ -206,7 +206,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, -0.281128735628831791805e-13), BOOST_MATH_BIG_CONSTANT(T, 64, 0.99055709973310326855e-16) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.591429344886417493481), BOOST_MATH_BIG_CONSTANT(T, 64, 0.138151865749083321638), @@ -222,8 +222,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* else if(x < 44) { // Max error found: 5.697761e-20 - static const float Y = 0.99714565277099609375f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.99714565277099609375f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.0024978212791898131227), BOOST_MATH_BIG_CONSTANT(T, 64, -0.779190719229053954292e-5), BOOST_MATH_BIG_CONSTANT(T, 64, 0.254723037413027451751e-4), @@ -233,7 +233,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, 0.145596286718675035587e-11), BOOST_MATH_BIG_CONSTANT(T, 64, -0.116765012397184275695e-17) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.207123112214422517181), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0169410838120975906478), @@ -249,8 +249,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* else { // Max error found: 1.279746e-20 - static const float Y = 0.99941349029541015625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.99941349029541015625f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000539042911019078575891), BOOST_MATH_BIG_CONSTANT(T, 64, -0.28398759004727721098e-6), BOOST_MATH_BIG_CONSTANT(T, 64, 0.899465114892291446442e-6), @@ -260,7 +260,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const boost::mpl::int_<64>* BOOST_MATH_BIG_CONSTANT(T, 64, 0.135880130108924861008e-14), BOOST_MATH_BIG_CONSTANT(T, 64, -0.348890393399948882918e-21) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0845746234001899436914), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00282092984726264681981), @@ -372,9 +372,11 @@ struct erf_inv_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -393,14 +395,14 @@ bool erf_inv_initializer::init::is_value_non_zero(T v) } // namespace detail template -typename tools::promote_args::type erfc_inv(T z, const Policy& pol) +BOOST_GPU_ENABLED typename tools::promote_args::type erfc_inv(T z, const Policy& pol) { typedef typename tools::promote_args::type result_type; // // Begin by testing for domain errors, and other special cases: // - static const char* function = "boost::math::erfc_inv<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::erfc_inv<%1%>(%1%, %1%)"; if((z < 0) || (z > 2)) return policies::raise_domain_error(function, "Argument outside range [0,2] in inverse erfc function (got p=%1%).", z, pol); if(z == 0) @@ -457,14 +459,14 @@ typename tools::promote_args::type erfc_inv(T z, const Policy& pol) } template -typename tools::promote_args::type erf_inv(T z, const Policy& pol) +BOOST_GPU_ENABLED typename tools::promote_args::type erf_inv(T z, const Policy& pol) { typedef typename tools::promote_args::type result_type; // // Begin by testing for domain errors, and other special cases: // - static const char* function = "boost::math::erf_inv<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::erf_inv<%1%>(%1%, %1%)"; if((z < -1) || (z > 1)) return policies::raise_domain_error(function, "Argument outside range [-1, 1] in inverse erf function (got p=%1%).", z, pol); if(z == 1) @@ -527,13 +529,13 @@ typename tools::promote_args::type erf_inv(T z, const Policy& pol) } template -inline typename tools::promote_args::type erfc_inv(T z) +BOOST_GPU_ENABLED inline typename tools::promote_args::type erfc_inv(T z) { return erfc_inv(z, policies::policy<>()); } template -inline typename tools::promote_args::type erf_inv(T z) +BOOST_GPU_ENABLED inline typename tools::promote_args::type erf_inv(T z) { return erf_inv(z, policies::policy<>()); } diff --git a/include/boost/math/special_functions/detail/fp_traits.hpp b/include/boost/math/special_functions/detail/fp_traits.hpp index c957022223..79a231858b 100644 --- a/include/boost/math/special_functions/detail/fp_traits.hpp +++ b/include/boost/math/special_functions/detail/fp_traits.hpp @@ -184,8 +184,8 @@ template<> struct fp_traits_non_native BOOST_STATIC_CONSTANT(uint32_t, significand = 0x007fffff); typedef uint32_t bits; - static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } - static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } + static BOOST_GPU_ENABLED void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } + static BOOST_GPU_ENABLED void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } }; // ieee_tag version, double (64 bits) ---------------------------------------------- @@ -204,12 +204,12 @@ template<> struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(double x, uint32_t& a) { std::memcpy(&a, reinterpret_cast(&x) + offset_, 4); } - static void set_bits(double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(double& x, uint32_t a) { std::memcpy(reinterpret_cast(&x) + offset_, &a, 4); } @@ -240,8 +240,8 @@ template<> struct fp_traits_non_native = (((uint64_t)0x000fffff) << 32) + ((uint64_t)0xffffffffu); typedef uint64_t bits; - static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } - static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } + static BOOST_GPU_ENABLED void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } + static BOOST_GPU_ENABLED void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } }; #endif @@ -264,12 +264,12 @@ template<> struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(long double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(long double x, uint32_t& a) { std::memcpy(&a, reinterpret_cast(&x) + offset_, 4); } - static void set_bits(long double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(long double& x, uint32_t a) { std::memcpy(reinterpret_cast(&x) + offset_, &a, 4); } @@ -300,8 +300,8 @@ template<> struct fp_traits_non_native = ((uint64_t)0x000fffff << 32) + (uint64_t)0xffffffffu; typedef uint64_t bits; - static void get_bits(long double x, uint64_t& a) { std::memcpy(&a, &x, 8); } - static void set_bits(long double& x, uint64_t a) { std::memcpy(&x, &a, 8); } + static BOOST_GPU_ENABLED void get_bits(long double x, uint64_t& a) { std::memcpy(&a, &x, 8); } + static BOOST_GPU_ENABLED void set_bits(long double& x, uint64_t a) { std::memcpy(&x, &a, 8); } }; #endif @@ -327,12 +327,12 @@ struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(long double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(long double x, uint32_t& a) { std::memcpy(&a, reinterpret_cast(&x) + 6, 4); } - static void set_bits(long double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(long double& x, uint32_t a) { std::memcpy(reinterpret_cast(&x) + 6, &a, 4); } @@ -378,12 +378,12 @@ struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(long double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(long double x, uint32_t& a) { std::memcpy(&a, reinterpret_cast(&x) + offset_, 4); } - static void set_bits(long double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(long double& x, uint32_t a) { std::memcpy(reinterpret_cast(&x) + offset_, &a, 4); } @@ -425,14 +425,14 @@ struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(long double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(long double x, uint32_t& a) { std::memcpy(&a, &x, 2); std::memcpy(reinterpret_cast(&a) + 2, reinterpret_cast(&x) + 4, 2); } - static void set_bits(long double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(long double& x, uint32_t a) { std::memcpy(&x, &a, 2); std::memcpy(reinterpret_cast(&x) + 4, @@ -459,12 +459,12 @@ struct fp_traits_non_native typedef uint32_t bits; - static void get_bits(long double x, uint32_t& a) + static BOOST_GPU_ENABLED void get_bits(long double x, uint32_t& a) { std::memcpy(&a, reinterpret_cast(&x) + offset_, 4); } - static void set_bits(long double& x, uint32_t a) + static BOOST_GPU_ENABLED void set_bits(long double& x, uint32_t a) { std::memcpy(reinterpret_cast(&x) + offset_, &a, 4); } diff --git a/include/boost/math/special_functions/detail/ibeta_inverse.hpp b/include/boost/math/special_functions/detail/ibeta_inverse.hpp index a9fe8cd49c..3e136cddef 100644 --- a/include/boost/math/special_functions/detail/ibeta_inverse.hpp +++ b/include/boost/math/special_functions/detail/ibeta_inverse.hpp @@ -119,9 +119,9 @@ T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol) else x = (1 + eta * sqrt((1 + c) / eta_2)) / 2; - BOOST_ASSERT(x >= 0); - BOOST_ASSERT(x <= 1); - BOOST_ASSERT(eta * (x - 0.5) >= 0); + BOOST_MATH_ASSERT(x >= 0); + BOOST_MATH_ASSERT(x <= 1); + BOOST_MATH_ASSERT(eta * (x - 0.5) >= 0); #ifdef BOOST_INSTRUMENT std::cout << "Estimating x with Temme method 1: " << x << std::endl; #endif @@ -853,8 +853,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py) // We don't really want these asserts here, but they are useful for sanity // checking that we have the limits right, uncomment if you suspect bugs *only*. // - //BOOST_ASSERT(x != upper); - //BOOST_ASSERT((x != lower) || (x == boost::math::tools::min_value()) || (x == boost::math::tools::epsilon())); + //BOOST_MATH_ASSERT(x != upper); + //BOOST_MATH_ASSERT((x != lower) || (x == boost::math::tools::min_value()) || (x == boost::math::tools::epsilon())); // // Tidy up, if we "lower" was too high then zero is the best answer we have: // diff --git a/include/boost/math/special_functions/detail/igamma_large.hpp b/include/boost/math/special_functions/detail/igamma_large.hpp index eb3d4ba93e..105c941564 100644 --- a/include/boost/math/special_functions/detail/igamma_large.hpp +++ b/include/boost/math/special_functions/detail/igamma_large.hpp @@ -55,10 +55,10 @@ namespace boost{ namespace math{ namespace detail{ // when T is unsuitable to be passed to these routines: // template -inline T igamma_temme_large(T, T, const Policy& /* pol */, mpl::int_<0> const *) +inline BOOST_GPU_ENABLED T igamma_temme_large(T, T, const Policy& /* pol */, mpl::int_<0> const *) { // stub function, should never actually be called - BOOST_ASSERT(0); + BOOST_MATH_ASSERT(0); return 0; } // @@ -66,7 +66,7 @@ inline T igamma_temme_large(T, T, const Policy& /* pol */, mpl::int_<0> const *) // (80-bit long double, or 10^-20). // template -T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) +BOOST_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -78,7 +78,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) T workspace[13]; - static const T C0[] = { + BOOST_MATH_GPU_STATIC const T C0[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.333333333333333333333), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0833333333333333333333), BOOST_MATH_BIG_CONSTANT(T, 64, -0.0148148148148148148148), @@ -101,7 +101,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[0] = tools::evaluate_polynomial(C0, z); - static const T C1[] = { + BOOST_MATH_GPU_STATIC const T C1[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.00185185185185185185185), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00347222222222222222222), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00264550264550264550265), @@ -122,7 +122,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[1] = tools::evaluate_polynomial(C1, z); - static const T C2[] = { + BOOST_MATH_GPU_STATIC const T C2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.00413359788359788359788), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00268132716049382716049), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000771604938271604938272), @@ -141,7 +141,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[2] = tools::evaluate_polynomial(C2, z); - static const T C3[] = { + BOOST_MATH_GPU_STATIC const T C3[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.000649434156378600823045), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000229472093621399176955), BOOST_MATH_BIG_CONSTANT(T, 64, -0.000469189494395255712128), @@ -158,7 +158,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[3] = tools::evaluate_polynomial(C3, z); - static const T C4[] = { + BOOST_MATH_GPU_STATIC const T C4[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000861888290916711698605), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000784039221720066627474), BOOST_MATH_BIG_CONSTANT(T, 64, -0.000299072480303190179733), @@ -173,7 +173,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[4] = tools::evaluate_polynomial(C4, z); - static const T C5[] = { + BOOST_MATH_GPU_STATIC const T C5[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000336798553366358150309), BOOST_MATH_BIG_CONSTANT(T, 64, -0.697281375836585777429e-4), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000277275324495939207873), @@ -186,7 +186,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[5] = tools::evaluate_polynomial(C5, z); - static const T C6[] = { + BOOST_MATH_GPU_STATIC const T C6[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.000531307936463992223166), BOOST_MATH_BIG_CONSTANT(T, 64, -0.000592166437353693882865), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000270878209671804482771), @@ -201,7 +201,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[6] = tools::evaluate_polynomial(C6, z); - static const T C7[] = { + BOOST_MATH_GPU_STATIC const T C7[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.000344367606892377671254), BOOST_MATH_BIG_CONSTANT(T, 64, 0.517179090826059219337e-4), BOOST_MATH_BIG_CONSTANT(T, 64, -0.000334931610811422363117), @@ -214,7 +214,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[7] = tools::evaluate_polynomial(C7, z); - static const T C8[] = { + BOOST_MATH_GPU_STATIC const T C8[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000652623918595309418922), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000839498720672087279993), BOOST_MATH_BIG_CONSTANT(T, 64, -0.000438297098541721005061), @@ -225,7 +225,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[8] = tools::evaluate_polynomial(C8, z); - static const T C9[] = { + BOOST_MATH_GPU_STATIC const T C9[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.000596761290192746250124), BOOST_MATH_BIG_CONSTANT(T, 64, -0.720489541602001055909e-4), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000678230883766732836162), @@ -234,14 +234,14 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[9] = tools::evaluate_polynomial(C9, z); - static const T C10[] = { + BOOST_MATH_GPU_STATIC const T C10[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.00133244544948006563713), BOOST_MATH_BIG_CONSTANT(T, 64, -0.0019144384985654775265), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00110893691345966373396), }; workspace[10] = tools::evaluate_polynomial(C10, z); - static const T C11[] = { + BOOST_MATH_GPU_STATIC const T C11[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 0.00157972766073083495909), BOOST_MATH_BIG_CONSTANT(T, 64, 0.000162516262783915816899), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00206334210355432762645), @@ -250,7 +250,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) }; workspace[11] = tools::evaluate_polynomial(C11, z); - static const T C12[] = { + BOOST_MATH_GPU_STATIC const T C12[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.00407251211951401664727), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00640336283380806979482), BOOST_MATH_BIG_CONSTANT(T, 64, -0.00404101610816766177474), @@ -271,7 +271,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<64> const *) // (IEEE double precision or 10^-17). // template -T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) +BOOST_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -283,7 +283,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) T workspace[10]; - static const T C0[] = { + BOOST_MATH_GPU_STATIC const T C0[] = { static_cast(-0.33333333333333333L), static_cast(0.083333333333333333L), static_cast(-0.014814814814814815L), @@ -302,7 +302,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[0] = tools::evaluate_polynomial(C0, z); - static const T C1[] = { + BOOST_MATH_GPU_STATIC const T C1[] = { static_cast(-0.0018518518518518519L), static_cast(-0.0034722222222222222L), static_cast(0.0026455026455026455L), @@ -319,7 +319,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[1] = tools::evaluate_polynomial(C1, z); - static const T C2[] = { + BOOST_MATH_GPU_STATIC const T C2[] = { static_cast(0.0041335978835978836L), static_cast(-0.0026813271604938272L), static_cast(0.00077160493827160494L), @@ -334,7 +334,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[2] = tools::evaluate_polynomial(C2, z); - static const T C3[] = { + BOOST_MATH_GPU_STATIC const T C3[] = { static_cast(0.00064943415637860082L), static_cast(0.00022947209362139918L), static_cast(-0.00046918949439525571L), @@ -347,7 +347,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[3] = tools::evaluate_polynomial(C3, z); - static const T C4[] = { + BOOST_MATH_GPU_STATIC const T C4[] = { static_cast(-0.0008618882909167117L), static_cast(0.00078403922172006663L), static_cast(-0.00029907248030319018L), @@ -358,7 +358,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[4] = tools::evaluate_polynomial(C4, z); - static const T C5[] = { + BOOST_MATH_GPU_STATIC const T C5[] = { static_cast(-0.00033679855336635815L), static_cast(-0.69728137583658578e-4L), static_cast(0.00027727532449593921L), @@ -371,7 +371,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[5] = tools::evaluate_polynomial(C5, z); - static const T C6[] = { + BOOST_MATH_GPU_STATIC const T C6[] = { static_cast(0.00053130793646399222L), static_cast(-0.00059216643735369388L), static_cast(0.00027087820967180448L), @@ -382,7 +382,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[6] = tools::evaluate_polynomial(C6, z); - static const T C7[] = { + BOOST_MATH_GPU_STATIC const T C7[] = { static_cast(0.00034436760689237767L), static_cast(0.51717909082605922e-4L), static_cast(-0.00033493161081142236L), @@ -391,7 +391,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) }; workspace[7] = tools::evaluate_polynomial(C7, z); - static const T C8[] = { + BOOST_MATH_GPU_STATIC const T C8[] = { static_cast(-0.00065262391859530942L), static_cast(0.00083949872067208728L), static_cast(-0.00043829709854172101L), @@ -413,7 +413,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<53> const *) // (IEEE float precision, or 10^-8) // template -T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) +BOOST_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -425,7 +425,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) T workspace[3]; - static const T C0[] = { + BOOST_MATH_GPU_STATIC const T C0[] = { static_cast(-0.333333333L), static_cast(0.0833333333L), static_cast(-0.0148148148L), @@ -436,7 +436,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) }; workspace[0] = tools::evaluate_polynomial(C0, z); - static const T C1[] = { + BOOST_MATH_GPU_STATIC const T C1[] = { static_cast(-0.00185185185L), static_cast(-0.00347222222L), static_cast(0.00264550265L), @@ -445,7 +445,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) }; workspace[1] = tools::evaluate_polynomial(C1, z); - static const T C2[] = { + BOOST_MATH_GPU_STATIC const T C2[] = { static_cast(0.00413359788L), static_cast(-0.00268132716L), static_cast(0.000771604938L), @@ -469,7 +469,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<24> const *) // require many more terms in the polynomials. // template -T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) +BOOST_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) { BOOST_MATH_STD_USING // ADL of std functions T sigma = (x - a) / a; @@ -481,7 +481,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) T workspace[14]; - static const T C0[] = { + BOOST_MATH_GPU_STATIC const T C0[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.333333333333333333333333333333333333), BOOST_MATH_BIG_CONSTANT(T, 113, 0.0833333333333333333333333333333333333), BOOST_MATH_BIG_CONSTANT(T, 113, -0.0148148148148148148148148148148148148), @@ -516,7 +516,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[0] = tools::evaluate_polynomial(C0, z); - static const T C1[] = { + BOOST_MATH_GPU_STATIC const T C1[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.00185185185185185185185185185185185185), BOOST_MATH_BIG_CONSTANT(T, 113, -0.00347222222222222222222222222222222222), BOOST_MATH_BIG_CONSTANT(T, 113, 0.0026455026455026455026455026455026455), @@ -549,7 +549,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[1] = tools::evaluate_polynomial(C1, z); - static const T C2[] = { + BOOST_MATH_GPU_STATIC const T C2[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.0041335978835978835978835978835978836), BOOST_MATH_BIG_CONSTANT(T, 113, -0.00268132716049382716049382716049382716), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000771604938271604938271604938271604938), @@ -580,7 +580,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[2] = tools::evaluate_polynomial(C2, z); - static const T C3[] = { + BOOST_MATH_GPU_STATIC const T C3[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.000649434156378600823045267489711934156), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000229472093621399176954732510288065844), BOOST_MATH_BIG_CONSTANT(T, 113, -0.000469189494395255712128140111679206329), @@ -609,7 +609,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[3] = tools::evaluate_polynomial(C3, z); - static const T C4[] = { + BOOST_MATH_GPU_STATIC const T C4[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.000861888290916711698604702719929057378), BOOST_MATH_BIG_CONSTANT(T, 113, 0.00078403922172006662747403488144228885), BOOST_MATH_BIG_CONSTANT(T, 113, -0.000299072480303190179733389609932819809), @@ -636,7 +636,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[4] = tools::evaluate_polynomial(C4, z); - static const T C5[] = { + BOOST_MATH_GPU_STATIC const T C5[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.000336798553366358150308767592718210002), BOOST_MATH_BIG_CONSTANT(T, 113, -0.697281375836585777429398828575783308e-4), BOOST_MATH_BIG_CONSTANT(T, 113, 0.00027727532449593920787336425196507501), @@ -657,7 +657,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[5] = tools::evaluate_polynomial(C5, z); - static const T C6[] = { + BOOST_MATH_GPU_STATIC const T C6[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.00053130793646399222316574854297762391), BOOST_MATH_BIG_CONSTANT(T, 113, -0.000592166437353693882864836225604401187), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000270878209671804482771279183488328692), @@ -676,7 +676,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[6] = tools::evaluate_polynomial(C6, z); - static const T C7[] = { + BOOST_MATH_GPU_STATIC const T C7[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.000344367606892377671254279625108523655), BOOST_MATH_BIG_CONSTANT(T, 113, 0.517179090826059219337057843002058823e-4), BOOST_MATH_BIG_CONSTANT(T, 113, -0.000334931610811422363116635090580012327), @@ -693,7 +693,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[7] = tools::evaluate_polynomial(C7, z); - static const T C8[] = { + BOOST_MATH_GPU_STATIC const T C8[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.000652623918595309418922034919726622692), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000839498720672087279993357516764983445), BOOST_MATH_BIG_CONSTANT(T, 113, -0.000438297098541721005061087953050560377), @@ -708,7 +708,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[8] = tools::evaluate_polynomial(C8, z); - static const T C9[] = { + BOOST_MATH_GPU_STATIC const T C9[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.000596761290192746250124390067179459605), BOOST_MATH_BIG_CONSTANT(T, 113, -0.720489541602001055908571930225015052e-4), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000678230883766732836161951166000673426), @@ -721,7 +721,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[9] = tools::evaluate_polynomial(C9, z); - static const T C10[] = { + BOOST_MATH_GPU_STATIC const T C10[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.00133244544948006563712694993432717968), BOOST_MATH_BIG_CONSTANT(T, 113, -0.00191443849856547752650089885832852254), BOOST_MATH_BIG_CONSTANT(T, 113, 0.0011089369134596637339607446329267522), @@ -732,7 +732,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[10] = tools::evaluate_polynomial(C10, z); - static const T C11[] = { + BOOST_MATH_GPU_STATIC const T C11[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.00157972766073083495908785631307733022), BOOST_MATH_BIG_CONSTANT(T, 113, 0.000162516262783915816898635123980270998), BOOST_MATH_BIG_CONSTANT(T, 113, -0.00206334210355432762645284467690276817), @@ -741,7 +741,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, mpl::int_<113> const *) }; workspace[11] = tools::evaluate_polynomial(C11, z); - static const T C12[] = { + BOOST_MATH_GPU_STATIC const T C12[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.00407251211951401664727281097914544601), BOOST_MATH_BIG_CONSTANT(T, 113, 0.00640336283380806979482363809026579583), BOOST_MATH_BIG_CONSTANT(T, 113, -0.00404101610816766177473974858518094879), diff --git a/include/boost/math/special_functions/detail/lgamma_small.hpp b/include/boost/math/special_functions/detail/lgamma_small.hpp index e65f8b7e98..ec1eb47176 100644 --- a/include/boost/math/special_functions/detail/lgamma_small.hpp +++ b/include/boost/math/special_functions/detail/lgamma_small.hpp @@ -18,15 +18,15 @@ namespace boost{ namespace math{ namespace detail{ // These need forward declaring to keep GCC happy: // template -T gamma_imp(T z, const Policy& pol, const Lanczos& l); +BOOST_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const Lanczos& l); template -T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l); +BOOST_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l); // // lgamma for small arguments: // template -T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l */, const Lanczos&) +BOOST_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l */, const Lanczos&) { // This version uses rational approximations for small // values of z accurate enough for 64-bit mantissas @@ -77,7 +77,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * // At long double: Max error found: 1.987e-21 // Maximum Deviation Found (approximation error): 5.900e-24 // - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.180355685678449379109e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.25126649619989678683e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.494103151567532234274e-1)), @@ -86,7 +86,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.541009869215204396339e-3)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.324588649825948492091e-4)) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.196202987197795200688e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.148019669424231326694e1)), @@ -97,7 +97,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.223352763208617092964e-6)) }; - static const float Y = 0.158963680267333984375e0f; + BOOST_MATH_GPU_STATIC const float Y = 0.158963680267333984375e0f; T r = zm2 * (z + 1); T R = tools::evaluate_polynomial(P, zm2); @@ -142,9 +142,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * // Expected Error Term: 3.139e-021 // - static const float Y = 0.52815341949462890625f; + BOOST_MATH_GPU_STATIC const float Y = 0.52815341949462890625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.490622454069039543534e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.969117530159521214579e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.414983358359495381969e0)), @@ -153,7 +153,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.240149820648571559892e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.100346687696279557415e-2)) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.302349829846463038743e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.348739585360723852576e1)), @@ -187,9 +187,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * // Maximum Deviation Found: 2.151e-021 // Expected Error Term: 2.150e-021 // - static const float Y = 0.452017307281494140625f; + BOOST_MATH_GPU_STATIC const float Y = 0.452017307281494140625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.292329721830270012337e-1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.144216267757192309184e0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.142440390738631274135e0)), @@ -197,7 +197,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.850535976868336437746e-2)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.431171342679297331241e-3)) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -0.150169356054485044494e1)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.846973248876495016101e0)), @@ -215,7 +215,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<64>&, const Policy& /* l * return result; } template -T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l */, const Lanczos&) +BOOST_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l */, const Lanczos&) { // // This version uses rational approximations for small @@ -264,7 +264,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l // // Maximum Deviation Found (approximation error) 3.73e-37 - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.018035568567844937910504030027467476655), BOOST_MATH_BIG_CONSTANT(T, 113, 0.013841458273109517271750705401202404195), BOOST_MATH_BIG_CONSTANT(T, 113, 0.062031842739486600078866923383017722399), @@ -277,7 +277,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l BOOST_MATH_BIG_CONSTANT(T, 113, -0.49999811718089980992888533630523892389e-6), BOOST_MATH_BIG_CONSTANT(T, 113, -0.70529798686542184668416911331718963364e-8) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0), BOOST_MATH_BIG_CONSTANT(T, 113, 2.5877485070422317542808137697939233685), BOOST_MATH_BIG_CONSTANT(T, 113, 2.8797959228352591788629602533153837126), @@ -296,7 +296,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l T R = tools::evaluate_polynomial(P, zm2); R /= tools::evaluate_polynomial(Q, zm2); - static const float Y = 0.158963680267333984375F; + BOOST_MATH_GPU_STATIC const float Y = 0.158963680267333984375F; T r = zm2 * (z + 1); @@ -340,9 +340,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l // Expected Error Term (theoretical error) 1.343e-36 // Max error found at 128-bit long double precision 1.007e-35 // - static const float Y = 0.54076099395751953125f; + BOOST_MATH_GPU_STATIC const float Y = 0.54076099395751953125f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 0.036454670944013329356512090082402429697), BOOST_MATH_BIG_CONSTANT(T, 113, -0.066235835556476033710068679907798799959), BOOST_MATH_BIG_CONSTANT(T, 113, -0.67492399795577182387312206593595565371), @@ -356,7 +356,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l BOOST_MATH_BIG_CONSTANT(T, 113, -0.10164985672213178500790406939467614498e-6), BOOST_MATH_BIG_CONSTANT(T, 113, 0.13680157145361387405588201461036338274e-8) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0), BOOST_MATH_BIG_CONSTANT(T, 113, 4.9106336261005990534095838574132225599), BOOST_MATH_BIG_CONSTANT(T, 113, 10.258804800866438510889341082793078432), @@ -393,9 +393,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l // Maximum Deviation Found (approximation error) 1.538e-37 // Expected Error Term (theoretical error) 2.350e-38 // - static const float Y = 0.483787059783935546875f; + BOOST_MATH_GPU_STATIC const float Y = 0.483787059783935546875f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.017977422421608624353488126610933005432), BOOST_MATH_BIG_CONSTANT(T, 113, 0.18484528905298309555089509029244135703), BOOST_MATH_BIG_CONSTANT(T, 113, -0.40401251514859546989565001431430884082), @@ -407,7 +407,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l BOOST_MATH_BIG_CONSTANT(T, 113, -0.57058739515423112045108068834668269608e-4), BOOST_MATH_BIG_CONSTANT(T, 113, 0.8207548771933585614380644961342925976e-6) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0), BOOST_MATH_BIG_CONSTANT(T, 113, -2.9629552288944259229543137757200262073), BOOST_MATH_BIG_CONSTANT(T, 113, 3.7118380799042118987185957298964772755), @@ -434,9 +434,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l // Maximum Deviation Found (approximation error) 8.588e-36 // Expected Error Term (theoretical error) 1.458e-36 // - static const float Y = 0.443811893463134765625f; + BOOST_MATH_GPU_STATIC const float Y = 0.443811893463134765625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.021027558364667626231512090082402429494), BOOST_MATH_BIG_CONSTANT(T, 113, 0.15128811104498736604523586803722368377), BOOST_MATH_BIG_CONSTANT(T, 113, -0.26249631480066246699388544451126410278), @@ -448,7 +448,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l BOOST_MATH_BIG_CONSTANT(T, 113, -0.11088589183158123733132268042570710338e-4), BOOST_MATH_BIG_CONSTANT(T, 113, 0.13240510580220763969511741896361984162e-6) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0), BOOST_MATH_BIG_CONSTANT(T, 113, -2.4240003754444040525462170802796471996), BOOST_MATH_BIG_CONSTANT(T, 113, 2.4868383476933178722203278602342786002), @@ -472,7 +472,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<113>&, const Policy& /* l return result; } template -T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<0>&, const Policy& pol, const Lanczos&) +BOOST_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const mpl::int_<0>&, const Policy& pol, const Lanczos&) { // // No rational approximations are available because either diff --git a/include/boost/math/special_functions/detail/polygamma.hpp b/include/boost/math/special_functions/detail/polygamma.hpp index c0e4932907..0847d2decb 100644 --- a/include/boost/math/special_functions/detail/polygamma.hpp +++ b/include/boost/math/special_functions/detail/polygamma.hpp @@ -437,8 +437,8 @@ namespace boost { namespace math { namespace detail{ for(int column = 0; column <= max_columns; ++column) { int cos_order = 2 * column + offset; // order of the cosine term in entry "column" - BOOST_ASSERT(column < (int)table[i].size()); - BOOST_ASSERT((cos_order + 1) / 2 < (int)table[i + 1].size()); + BOOST_MATH_ASSERT(column < (int)table[i].size()); + BOOST_MATH_ASSERT((cos_order + 1) / 2 < (int)table[i + 1].size()); table[i + 1][(cos_order + 1) / 2] += ((cos_order - sin_order) * table[i][column]) / (sin_order - 1); if(cos_order) table[i + 1][(cos_order - 1) / 2] += (-cos_order * table[i][column]) / (sin_order - 1); diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp index 8c45a7d75a..cf58793f92 100644 --- a/include/boost/math/special_functions/detail/round_fwd.hpp +++ b/include/boost/math/special_functions/detail/round_fwd.hpp @@ -21,58 +21,58 @@ namespace boost { template - typename tools::promote_args::type trunc(const T& v, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy& pol); template - typename tools::promote_args::type trunc(const T& v); + BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v); template - int itrunc(const T& v, const Policy& pol); + BOOST_GPU_ENABLED int itrunc(const T& v, const Policy& pol); template - int itrunc(const T& v); + BOOST_GPU_ENABLED int itrunc(const T& v); template - long ltrunc(const T& v, const Policy& pol); + BOOST_GPU_ENABLED long ltrunc(const T& v, const Policy& pol); template - long ltrunc(const T& v); + BOOST_GPU_ENABLED long ltrunc(const T& v); #ifdef BOOST_HAS_LONG_LONG template - boost::long_long_type lltrunc(const T& v, const Policy& pol); + BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v, const Policy& pol); template - boost::long_long_type lltrunc(const T& v); + BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v); #endif template - typename tools::promote_args::type round(const T& v, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol); template - typename tools::promote_args::type round(const T& v); + BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v); template - int iround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol); template - int iround(const T& v); + BOOST_GPU_ENABLED int iround(const T& v); template - long lround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol); template - long lround(const T& v); + BOOST_GPU_ENABLED long lround(const T& v); #ifdef BOOST_HAS_LONG_LONG template - boost::long_long_type llround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol); template - boost::long_long_type llround(const T& v); + BOOST_GPU_ENABLED boost::long_long_type llround(const T& v); #endif template - T modf(const T& v, T* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol); template - T modf(const T& v, T* ipart); + BOOST_GPU_ENABLED T modf(const T& v, T* ipart); template - T modf(const T& v, int* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol); template - T modf(const T& v, int* ipart); + BOOST_GPU_ENABLED T modf(const T& v, int* ipart); template - T modf(const T& v, long* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol); template - T modf(const T& v, long* ipart); + BOOST_GPU_ENABLED T modf(const T& v, long* ipart); #ifdef BOOST_HAS_LONG_LONG template - T modf(const T& v, boost::long_long_type* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol); template - T modf(const T& v, boost::long_long_type* ipart); + BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart); #endif } diff --git a/include/boost/math/special_functions/detail/t_distribution_inv.hpp b/include/boost/math/special_functions/detail/t_distribution_inv.hpp index ab5a8fbca6..32740609d6 100644 --- a/include/boost/math/special_functions/detail/t_distribution_inv.hpp +++ b/include/boost/math/special_functions/detail/t_distribution_inv.hpp @@ -27,7 +27,7 @@ template T inverse_students_t_hill(T ndf, T u, const Policy& pol) { BOOST_MATH_STD_USING - BOOST_ASSERT(u <= 0.5); + BOOST_MATH_ASSERT(u <= 0.5); T a, b, c, d, q, x, y; diff --git a/include/boost/math/special_functions/detail/unchecked_factorial.hpp b/include/boost/math/special_functions/detail/unchecked_factorial.hpp index 17366742c4..1d5c2ab653 100644 --- a/include/boost/math/special_functions/detail/unchecked_factorial.hpp +++ b/include/boost/math/special_functions/detail/unchecked_factorial.hpp @@ -35,6 +35,8 @@ namespace boost { namespace math template struct max_factorial; +#ifndef __CUDA_ARCH__ + // Definitions: template <> inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float)) @@ -42,7 +44,7 @@ inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsi #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES constexpr std::array factorials = { { #else - static const boost::array factorials = {{ + BOOST_MATH_GPU_STATIC const boost::array factorials = {{ #endif 1.0F, 1.0F, @@ -84,6 +86,52 @@ inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial(unsi return factorials[i]; } +#else +template <> +inline BOOST_GPU_ENABLED float unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float)) +{ + BOOST_MATH_GPU_STATIC const float factorials[] = { + 1.0F, + 1.0F, + 2.0F, + 6.0F, + 24.0F, + 120.0F, + 720.0F, + 5040.0F, + 40320.0F, + 362880.0F, + 3628800.0F, + 39916800.0F, + 479001600.0F, + 6227020800.0F, + 87178291200.0F, + 1307674368000.0F, + 20922789888000.0F, + 355687428096000.0F, + 6402373705728000.0F, + 121645100408832000.0F, + 0.243290200817664e19F, + 0.5109094217170944e20F, + 0.112400072777760768e22F, + 0.2585201673888497664e23F, + 0.62044840173323943936e24F, + 0.15511210043330985984e26F, + 0.403291461126605635584e27F, + 0.10888869450418352160768e29F, + 0.304888344611713860501504e30F, + 0.8841761993739701954543616e31F, + 0.26525285981219105863630848e33F, + 0.822283865417792281772556288e34F, + 0.26313083693369353016721801216e36F, + 0.868331761881188649551819440128e37F, + 0.29523279903960414084761860964352e39F, + }; + + return factorials[i]; +} +#endif + template <> struct max_factorial { @@ -91,13 +139,15 @@ struct max_factorial }; +#ifndef __CUDA_ARCH__ + template <> -inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION long double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(long double)) +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR_TABLE_FUNCTION long double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(long double)) { #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES constexpr std::array factorials = { { #else - static const boost::array factorials = {{ + BOOST_MATH_GPU_STATIC const boost::array factorials = {{ #endif 1L, 1L, @@ -281,6 +331,7 @@ struct max_factorial BOOST_STATIC_CONSTANT(unsigned, value = 170); }; +#endif #ifdef BOOST_MATH_USE_FLOAT128 template <> @@ -475,6 +526,8 @@ struct max_factorial #endif +#ifndef __CUDA_ARCH__ + template <> inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double)) { @@ -488,6 +541,65 @@ struct max_factorial value = ::boost::math::max_factorial::value); }; +#else + +template <> +inline BOOST_GPU_ENABLED double unchecked_factorial(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double)) +{ +#ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES + constexpr const double factorials[] = { +#else + BOOST_MATH_GPU_STATIC const double factorials[] = { +#endif + 1, + 1, + 2, + 6, + 24, + 120, + 720, + 5040, + 40320, + 362880.0, + 3628800.0, + 39916800.0, + 479001600.0, + 6227020800.0, + 87178291200.0, + 1307674368000.0, + 20922789888000.0, + 355687428096000.0, + 6402373705728000.0, + 121645100408832000.0, + 0.243290200817664e19, + 0.5109094217170944e20, + 0.112400072777760768e22, + 0.2585201673888497664e23, + 0.62044840173323943936e24, + 0.15511210043330985984e26, + 0.403291461126605635584e27, + 0.10888869450418352160768e29, + 0.304888344611713860501504e30, + 0.8841761993739701954543616e31, + 0.26525285981219105863630848e33, + 0.822283865417792281772556288e34, + 0.26313083693369353016721801216e36, + 0.868331761881188649551819440128e37, + 0.29523279903960414084761860964352e39, + }; + + return factorials[i]; +} + +template <> +struct max_factorial +{ + BOOST_STATIC_CONSTANT(unsigned, + value = 34); +}; + +#endif + #ifndef BOOST_MATH_NO_LEXICAL_CAST template diff --git a/include/boost/math/special_functions/ellint_1.hpp b/include/boost/math/special_functions/ellint_1.hpp index d1d9d72e30..c6a09a4390 100644 --- a/include/boost/math/special_functions/ellint_1.hpp +++ b/include/boost/math/special_functions/ellint_1.hpp @@ -31,22 +31,22 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); +BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_k_imp(T k, const Policy& pol); +BOOST_GPU_ENABLED T ellint_k_imp(T k, const Policy& pol); // Elliptic integral (Legendre form) of the first kind template -T ellint_f_imp(T phi, T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_f_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; using namespace boost::math::constants; - static const char* function = "boost::math::ellint_f<%1%>(%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_f<%1%>(%1%,%1%)"; BOOST_MATH_INSTRUMENT_VARIABLE(phi); BOOST_MATH_INSTRUMENT_VARIABLE(k); BOOST_MATH_INSTRUMENT_VARIABLE(function); @@ -131,12 +131,12 @@ T ellint_f_imp(T phi, T k, const Policy& pol) // Complete elliptic integral (Legendre form) of the first kind template -T ellint_k_imp(T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_k_imp(T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; - static const char* function = "boost::math::ellint_k<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_k<%1%>(%1%)"; if (abs(k) > 1) { @@ -157,7 +157,7 @@ T ellint_k_imp(T k, const Policy& pol) } template -inline typename tools::promote_args::type ellint_1(T k, const Policy& pol, const mpl::true_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T k, const Policy& pol, const mpl::true_&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -165,7 +165,7 @@ inline typename tools::promote_args::type ellint_1(T k, const Policy& pol, co } template -inline typename tools::promote_args::type ellint_1(T1 k, T2 phi, const mpl::false_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const mpl::false_&) { return boost::math::ellint_1(k, phi, policies::policy<>()); } @@ -174,14 +174,14 @@ inline typename tools::promote_args::type ellint_1(T1 k, T2 phi, const m // Complete elliptic integral (Legendre form) of the first kind template -inline typename tools::promote_args::type ellint_1(T k) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T k) { return ellint_1(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the first kind template -inline typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -189,7 +189,7 @@ inline typename tools::promote_args::type ellint_1(T1 k, T2 phi, const P } template -inline typename tools::promote_args::type ellint_1(T1 k, T2 phi) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_1(k, phi, tag_type()); diff --git a/include/boost/math/special_functions/ellint_2.hpp b/include/boost/math/special_functions/ellint_2.hpp index 9ee6b63821..56a40685e5 100644 --- a/include/boost/math/special_functions/ellint_2.hpp +++ b/include/boost/math/special_functions/ellint_2.hpp @@ -33,16 +33,16 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); +BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_e_imp(T k, const Policy& pol); +BOOST_GPU_ENABLED T ellint_e_imp(T k, const Policy& pol); // Elliptic integral (Legendre form) of the second kind template -T ellint_e_imp(T phi, T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_e_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -121,7 +121,7 @@ T ellint_e_imp(T phi, T k, const Policy& pol) // Complete elliptic integral (Legendre form) of the second kind template -T ellint_e_imp(T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_e_imp(T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -146,7 +146,7 @@ T ellint_e_imp(T k, const Policy& pol) } template -inline typename tools::promote_args::type ellint_2(T k, const Policy& pol, const mpl::true_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T k, const Policy& pol, const mpl::true_&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -155,7 +155,7 @@ inline typename tools::promote_args::type ellint_2(T k, const Policy& pol, co // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_2(T1 k, T2 phi, const mpl::false_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const mpl::false_&) { return boost::math::ellint_2(k, phi, policies::policy<>()); } @@ -164,21 +164,21 @@ inline typename tools::promote_args::type ellint_2(T1 k, T2 phi, const m // Complete elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_2(T k) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T k) { return ellint_2(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_2(T1 k, T2 phi) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_2(k, phi, tag_type()); } template -inline typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; diff --git a/include/boost/math/special_functions/ellint_3.hpp b/include/boost/math/special_functions/ellint_3.hpp index b8b36729cf..0d0f26a4d7 100644 --- a/include/boost/math/special_functions/ellint_3.hpp +++ b/include/boost/math/special_functions/ellint_3.hpp @@ -38,16 +38,16 @@ namespace boost { namespace math { namespace detail{ template -T ellint_pi_imp(T v, T k, T vc, const Policy& pol); +BOOST_GPU_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol); // Elliptic integral (Legendre form) of the third kind template -T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) +BOOST_GPU_ENABLED T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) { // Note vc = 1-v presumably without cancellation error. BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)"; if(abs(k) > 1) { @@ -254,8 +254,8 @@ T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) // by the time we get here phi should already have been // normalised above. // - BOOST_ASSERT(fabs(phi) < constants::half_pi()); - BOOST_ASSERT(phi >= 0); + BOOST_MATH_ASSERT(fabs(phi) < constants::half_pi()); + BOOST_MATH_ASSERT(phi >= 0); T x, y, z, p, t; T cosp = cos(phi); x = cosp * cosp; @@ -273,13 +273,13 @@ T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol) // Complete elliptic integral (Legendre form) of the third kind template -T ellint_pi_imp(T v, T k, T vc, const Policy& pol) +BOOST_GPU_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol) { // Note arg vc = 1-v, possibly without cancellation errors BOOST_MATH_STD_USING using namespace boost::math::tools; - static const char* function = "boost::math::ellint_pi<%1%>(%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_pi<%1%>(%1%,%1%)"; if (abs(k) >= 1) { @@ -323,13 +323,13 @@ T ellint_pi_imp(T v, T k, T vc, const Policy& pol) } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const mpl::false_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const mpl::false_&) { return boost::math::ellint_3(k, v, phi, policies::policy<>()); } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, const Policy& pol, const mpl::true_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v, const Policy& pol, const mpl::true_&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -344,7 +344,7 @@ inline typename tools::promote_args::type ellint_3(T1 k, T2 v, const Pol } // namespace detail template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -358,14 +358,14 @@ inline typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 ph } template -typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi) +inline BOOST_GPU_ENABLED typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_3(k, v, phi, tag_type()); } template -inline typename tools::promote_args::type ellint_3(T1 k, T2 v) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v) { return ellint_3(k, v, policies::policy<>()); } diff --git a/include/boost/math/special_functions/ellint_d.hpp b/include/boost/math/special_functions/ellint_d.hpp index fa5c53db18..05afbb6b0a 100644 --- a/include/boost/math/special_functions/ellint_d.hpp +++ b/include/boost/math/special_functions/ellint_d.hpp @@ -33,16 +33,16 @@ namespace boost { namespace math { template -typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); +BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); namespace detail{ template -T ellint_d_imp(T k, const Policy& pol); +BOOST_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol); // Elliptic integral (Legendre form) of the second kind template -T ellint_d_imp(T phi, T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_d_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -113,7 +113,7 @@ T ellint_d_imp(T phi, T k, const Policy& pol) // Complete elliptic integral (Legendre form) of the second kind template -T ellint_d_imp(T k, const Policy& pol) +BOOST_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -136,7 +136,7 @@ T ellint_d_imp(T k, const Policy& pol) } template -inline typename tools::promote_args::type ellint_d(T k, const Policy& pol, const mpl::true_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T k, const Policy& pol, const mpl::true_&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -145,7 +145,7 @@ inline typename tools::promote_args::type ellint_d(T k, const Policy& pol, co // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const mpl::false_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi, const mpl::false_&) { return boost::math::ellint_d(k, phi, policies::policy<>()); } @@ -154,21 +154,21 @@ inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const m // Complete elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T k) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T k) { return ellint_d(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the second kind template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_d(k, phi, tag_type()); } template -inline typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; diff --git a/include/boost/math/special_functions/ellint_rc.hpp b/include/boost/math/special_functions/ellint_rc.hpp index 846c752a14..b6a81a4ea2 100644 --- a/include/boost/math/special_functions/ellint_rc.hpp +++ b/include/boost/math/special_functions/ellint_rc.hpp @@ -32,11 +32,11 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rc_imp(T x, T y, const Policy& pol) +BOOST_GPU_ENABLED T ellint_rc_imp(T x, T y, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; if(x < 0) { @@ -90,7 +90,7 @@ T ellint_rc_imp(T x, T y, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rc(T1 x, T2 y, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -102,7 +102,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rc(T1 x, T2 y) { return ellint_rc(x, y, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rd.hpp b/include/boost/math/special_functions/ellint_rd.hpp index c08430d545..4faee57dd2 100644 --- a/include/boost/math/special_functions/ellint_rd.hpp +++ b/include/boost/math/special_functions/ellint_rd.hpp @@ -29,12 +29,12 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rd_imp(T x, T y, T z, const Policy& pol) +BOOST_GPU_ENABLED T ellint_rd_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING using std::swap; - static const char* function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)"; if(x < 0) { @@ -61,7 +61,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) // using std::swap; if(x == z) - swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); if(y == z) { if(x == y) @@ -74,19 +74,19 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) } else { - if((std::min)(x, y) / (std::max)(x, y) > 1.3) + if(BOOST_MATH_CUDA_SAFE_MIN(x, y) / BOOST_MATH_CUDA_SAFE_MAX(x, y) > 1.3) return 3 * (ellint_rc_imp(x, y, pol) - sqrt(x) / y) / (2 * (y - x)); // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y) } } if(x == y) { - if((std::min)(x, z) / (std::max)(x, z) > 1.3) + if(BOOST_MATH_CUDA_SAFE_MIN(x, z) / BOOST_MATH_CUDA_SAFE_MAX(x, z) > 1.3) return 3 * (ellint_rc_imp(z, x, pol) - 1 / sqrt(z)) / (z - x); // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y) } if(y == 0) - swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); if(x == 0) { // @@ -132,7 +132,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) T An = (x + y + 3 * z) / 5; T A0 = An; // This has an extra 1.2 fudge factor which is really only needed when x, y and z are close in magnitude: - T Q = pow(tools::epsilon() / 4, -T(1) / 8) * (std::max)((std::max)(An - x, An - y), An - z) * 1.2f; + T Q = pow(tools::epsilon() / 4, -T(1) / 8) * BOOST_MATH_CUDA_SAFE_MAX(BOOST_MATH_CUDA_SAFE_MAX(An - x, An - y), An - z) * 1.2f; BOOST_MATH_INSTRUMENT_VARIABLE(Q); T lambda, rx, ry, rz; unsigned k = 0; @@ -181,7 +181,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -194,7 +194,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z) { return ellint_rd(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rf.hpp b/include/boost/math/special_functions/ellint_rf.hpp index a8a7b4b217..26c4824545 100644 --- a/include/boost/math/special_functions/ellint_rf.hpp +++ b/include/boost/math/special_functions/ellint_rf.hpp @@ -23,6 +23,10 @@ #include #include +#ifdef __CUDA_ARCH__ +#include +#endif + // Carlson's elliptic integral of the first kind // R_F(x, y, z) = 0.5 * \int_{0}^{\infty} [(t+x)(t+y)(t+z)]^{-1/2} dt // Carlson, Numerische Mathematik, vol 33, 1 (1979) @@ -30,27 +34,34 @@ namespace boost { namespace math { namespace detail{ template - T ellint_rf_imp(T x, T y, T z, const Policy& pol) + BOOST_GPU_ENABLED T ellint_rf_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING - using namespace boost::math; using std::swap; - static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; if(x < 0 || y < 0 || z < 0) { return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, " "only sensible result is %1%.", +#ifdef __CUDA_ARCH__ + static_cast(CUDART_NAN), pol); +#else std::numeric_limits::quiet_NaN(), pol); +#endif } if(x + y == 0 || y + z == 0 || z + x == 0) { return policies::raise_domain_error(function, "domain error, at most one argument can be zero, " "only sensible result is %1%.", +#ifdef __CUDA_ARCH__ + static_cast(CUDART_NAN), pol); +#else std::numeric_limits::quiet_NaN(), pol); +#endif } // // Special cases from http://dlmf.nist.gov/19.20#i @@ -86,9 +97,9 @@ namespace boost { namespace math { namespace detail{ return ellint_rc_imp(x, y, pol); } if(x == 0) - swap(x, z); + BOOST_MATH_CUDA_SAFE_SWAP(x, z); else if(y == 0) - swap(y, z); + BOOST_MATH_CUDA_SAFE_SWAP(y, z); if(z == 0) { // @@ -111,7 +122,7 @@ namespace boost { namespace math { namespace detail{ T zn = z; T An = (x + y + z) / 3; T A0 = An; - T Q = pow(3 * boost::math::tools::epsilon(), T(-1) / 8) * (std::max)((std::max)(fabs(An - xn), fabs(An - yn)), fabs(An - zn)); + T Q = pow(3 * boost::math::tools::epsilon(), T(-1) / 8) * BOOST_MATH_CUDA_SAFE_MAX(BOOST_MATH_CUDA_SAFE_MAX(fabs(An - xn), fabs(An - yn)), fabs(An - zn)); T fn = 1; @@ -149,7 +160,7 @@ namespace boost { namespace math { namespace detail{ } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -162,7 +173,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z) { return ellint_rf(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rg.hpp b/include/boost/math/special_functions/ellint_rg.hpp index bb5b7c376a..67497a2c43 100644 --- a/include/boost/math/special_functions/ellint_rg.hpp +++ b/include/boost/math/special_functions/ellint_rg.hpp @@ -18,20 +18,28 @@ #include #include +#ifdef __CUDA_ARCH__ +#include +#endif + namespace boost { namespace math { namespace detail{ template - T ellint_rg_imp(T x, T y, T z, const Policy& pol) + BOOST_GPU_ENABLED T ellint_rg_imp(T x, T y, T z, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)"; if(x < 0 || y < 0 || z < 0) { return policies::raise_domain_error(function, "domain error, all arguments must be non-negative, " "only sensible result is %1%.", +#ifdef __CUDA_ARCH__ + static_cast(CUDART_NAN), pol); +#else std::numeric_limits::quiet_NaN(), pol); +#endif } // // Function is symmetric in x, y and z, but we require @@ -40,14 +48,14 @@ namespace boost { namespace math { namespace detail{ // using std::swap; if(x < y) - swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); if(x < z) - swap(x, z); + BOOST_MATH_CUDA_SAFE_SWAP(x, z); if(y > z) - swap(y, z); + BOOST_MATH_CUDA_SAFE_SWAP(y, z); - BOOST_ASSERT(x >= z); - BOOST_ASSERT(z >= y); + BOOST_MATH_ASSERT(x >= z); + BOOST_MATH_ASSERT(z >= y); // // Special cases from http://dlmf.nist.gov/19.20#ii // @@ -67,7 +75,7 @@ namespace boost { namespace math { namespace detail{ else { // x = z, y != 0 - swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); return (x == 0) ? T(sqrt(z) / 2) : T((z * ellint_rc_imp(x, z, pol) + sqrt(x)) / 2); } } @@ -80,7 +88,7 @@ namespace boost { namespace math { namespace detail{ } else if(y == 0) { - swap(y, z); + BOOST_MATH_CUDA_SAFE_SWAP(y, z); // // Special handling for common case, from // Numerical Computation of Real or Complex Elliptic Integrals, eq.46 @@ -111,7 +119,7 @@ namespace boost { namespace math { namespace detail{ } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -124,7 +132,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z) { return ellint_rg(x, y, z, policies::policy<>()); diff --git a/include/boost/math/special_functions/ellint_rj.hpp b/include/boost/math/special_functions/ellint_rj.hpp index ac39bed678..bfded1a004 100644 --- a/include/boost/math/special_functions/ellint_rj.hpp +++ b/include/boost/math/special_functions/ellint_rj.hpp @@ -25,6 +25,10 @@ #include #include +#ifdef __CUDA_ARCH__ +#include +#endif + // Carlson's elliptic integral of the third kind // R_J(x, y, z, p) = 1.5 * \int_{0}^{\infty} (t+p)^{-1} [(t+x)(t+y)(t+z)]^{-1/2} dt // Carlson, Numerische Mathematik, vol 33, 1 (1979) @@ -32,13 +36,13 @@ namespace boost { namespace math { namespace detail{ template -T ellint_rc1p_imp(T y, const Policy& pol) +BOOST_GPU_ENABLED T ellint_rc1p_imp(T y, const Policy& pol) { using namespace boost::math; // Calculate RC(1, 1 + x) BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)"; if(y == -1) { @@ -76,11 +80,11 @@ T ellint_rc1p_imp(T y, const Policy& pol) } template -T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) +BOOST_GPU_ENABLED T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)"; if(x < 0) { @@ -106,7 +110,12 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) { return policies::raise_domain_error(function, "At most one argument can be zero, " - "only possible result is %1%.", std::numeric_limits::quiet_NaN(), pol); + "only possible result is %1%.", +#ifdef __CUDA_ARCH__ + static_cast(CUDART_NAN), pol); +#else + std::numeric_limits::quiet_NaN(), pol); +#endif } // for p < 0, the integral is singular, return Cauchy principal value @@ -117,20 +126,21 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) // Since the integral is symmetrical in x, y and z // we can just permute the values: // + using std::swap; if(x > y) - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); if(y > z) - std::swap(y, z); + BOOST_MATH_CUDA_SAFE_SWAP(y, z); if(x > y) - std::swap(x, y); + BOOST_MATH_CUDA_SAFE_SWAP(x, y); - BOOST_ASSERT(x <= y); - BOOST_ASSERT(y <= z); + BOOST_MATH_ASSERT(x <= y); + BOOST_MATH_ASSERT(y <= z); T q = -p; p = (z * (x + y + q) - x * y) / (z + q); - BOOST_ASSERT(p >= 0); + BOOST_MATH_ASSERT(p >= 0); T value = (p - z) * ellint_rj_imp(x, y, z, p, pol); value -= 3 * ellint_rf_imp(x, y, z, pol); @@ -161,12 +171,12 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) { // x = y only, permute so y = z: using std::swap; - swap(x, z); + BOOST_MATH_CUDA_SAFE_SWAP(x, z); if(y == p) { return ellint_rd_imp(x, y, y, pol); } - else if((std::max)(y, p) / (std::min)(y, p) > 1.2) + else if(BOOST_MATH_CUDA_SAFE_MAX(y, p) / BOOST_MATH_CUDA_SAFE_MIN(y, p) > 1.2) { return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y); } @@ -180,7 +190,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) // y = z = p: return ellint_rd_imp(x, y, y, pol); } - else if((std::max)(y, p) / (std::min)(y, p) > 1.2) + else if(BOOST_MATH_CUDA_SAFE_MAX(y, p) / BOOST_MATH_CUDA_SAFE_MIN(y, p) > 1.2) { // y = z: return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y); @@ -199,7 +209,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) T An = (x + y + z + 2 * p) / 5; T A0 = An; T delta = (p - x) * (p - y) * (p - z); - T Q = pow(tools::epsilon() / 5, -T(1) / 8) * (std::max)((std::max)(fabs(An - x), fabs(An - y)), (std::max)(fabs(An - z), fabs(An - p))); + T Q = pow(tools::epsilon() / 5, -T(1) / 8) * BOOST_MATH_CUDA_SAFE_MAX(BOOST_MATH_CUDA_SAFE_MAX(fabs(An - x), fabs(An - y)), BOOST_MATH_CUDA_SAFE_MAX(fabs(An - z), fabs(An - p))); unsigned n; T lambda; @@ -275,7 +285,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -290,7 +300,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p) { return ellint_rj(x, y, z, p, policies::policy<>()); diff --git a/include/boost/math/special_functions/erf.hpp b/include/boost/math/special_functions/erf.hpp index eda223b5d2..c61d4b75c2 100644 --- a/include/boost/math/special_functions/erf.hpp +++ b/include/boost/math/special_functions/erf.hpp @@ -28,7 +28,7 @@ namespace detail template struct erf_asympt_series_t { - erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1) + BOOST_GPU_ENABLED erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1) { BOOST_MATH_STD_USING result = -exp(-z * z) / sqrt(boost::math::constants::pi()); @@ -37,7 +37,7 @@ struct erf_asympt_series_t typedef T result_type; - T operator()() + BOOST_GPU_ENABLED T operator()() { BOOST_MATH_STD_USING T r = result; @@ -56,33 +56,33 @@ struct erf_asympt_series_t // How large z has to be in order to ensure that the series converges: // template -inline float erf_asymptotic_limit_N(const T&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const T&) { return (std::numeric_limits::max)(); } -inline float erf_asymptotic_limit_N(const mpl::int_<24>&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const mpl::int_<24>&) { return 2.8F; } -inline float erf_asymptotic_limit_N(const mpl::int_<53>&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const mpl::int_<53>&) { return 4.3F; } -inline float erf_asymptotic_limit_N(const mpl::int_<64>&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const mpl::int_<64>&) { return 4.8F; } -inline float erf_asymptotic_limit_N(const mpl::int_<106>&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const mpl::int_<106>&) { return 6.5F; } -inline float erf_asymptotic_limit_N(const mpl::int_<113>&) +inline BOOST_GPU_ENABLED float erf_asymptotic_limit_N(const mpl::int_<113>&) { return 6.8F; } template -inline T erf_asymptotic_limit() +inline BOOST_GPU_ENABLED T erf_asymptotic_limit() { typedef typename policies::precision::type precision_type; typedef typename mpl::if_< @@ -176,20 +176,36 @@ T erf_imp(T z, bool invert, const Policy& pol, const Tag& t) } template -T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) +BOOST_GPU_ENABLED T erf_imp(T z, bool invert, const Policy&, const mpl::int_<53>&) { BOOST_MATH_STD_USING BOOST_MATH_INSTRUMENT_CODE("53-bit precision erf_imp called"); + int prefix_multiplier = 1; + int prefix_adder = 0; + if(z < 0) { + // Recursion is logically simpler here, but confuses static analyzers that need to be + // able to calculate the maximimum program stack size at compile time (ie CUDA). + z = -z; if(!invert) - return -erf_imp(T(-z), invert, pol, t); + { + prefix_multiplier = -1; + // return -erf_imp(T(-z), invert, pol, t); + } else if(z < -0.5) - return 2 - erf_imp(T(-z), invert, pol, t); + { + prefix_adder = 2; + // return 2 - erf_imp(T(-z), invert, pol, t); + } else - return 1 + erf_imp(T(-z), false, pol, t); + { + invert = false; + prefix_adder = 1; + // return 1 + erf_imp(T(-z), false, pol, t); + } } T result; @@ -212,7 +228,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) } else { - static const T c = BOOST_MATH_BIG_CONSTANT(T, 53, 0.003379167095512573896158903121545171688); + BOOST_MATH_GPU_STATIC const T c = BOOST_MATH_BIG_CONSTANT(T, 53, 0.003379167095512573896158903121545171688); result = static_cast(z * 1.125f + z * c); } } @@ -223,15 +239,15 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) // Maximum Relative Change in Control Points: 1.155e-04 // Max Error found at double precision = 2.961182e-17 - static const T Y = 1.044948577880859375f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T Y = 1.044948577880859375f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.0834305892146531832907), BOOST_MATH_BIG_CONSTANT(T, 53, -0.338165134459360935041), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0509990735146777432841), BOOST_MATH_BIG_CONSTANT(T, 53, -0.00772758345802133288487), BOOST_MATH_BIG_CONSTANT(T, 53, -0.000322780120964605683831), }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 0.455004033050794024546), BOOST_MATH_BIG_CONSTANT(T, 53, 0.0875222600142252549554), @@ -254,8 +270,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) // Expected Error Term: 3.702e-17 // Maximum Relative Change in Control Points: 2.845e-04 // Max Error found at double precision = 4.841816e-17 - static const T Y = 0.405935764312744140625f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T Y = 0.405935764312744140625f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.098090592216281240205), BOOST_MATH_BIG_CONSTANT(T, 53, 0.178114665841120341155), BOOST_MATH_BIG_CONSTANT(T, 53, 0.191003695796775433986), @@ -263,7 +279,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) BOOST_MATH_BIG_CONSTANT(T, 53, 0.0195049001251218801359), BOOST_MATH_BIG_CONSTANT(T, 53, 0.00180424538297014223957), }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 1.84759070983002217845), BOOST_MATH_BIG_CONSTANT(T, 53, 1.42628004845511324508), @@ -287,8 +303,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) // Maximum Deviation Found: 3.909e-18 // Expected Error Term: 3.909e-18 // Maximum Relative Change in Control Points: 9.886e-05 - static const T Y = 0.50672817230224609375f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T Y = 0.50672817230224609375f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.0243500476207698441272), BOOST_MATH_BIG_CONSTANT(T, 53, 0.0386540375035707201728), BOOST_MATH_BIG_CONSTANT(T, 53, 0.04394818964209516296), @@ -296,7 +312,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) BOOST_MATH_BIG_CONSTANT(T, 53, 0.00323962406290842133584), BOOST_MATH_BIG_CONSTANT(T, 53, 0.000235839115596880717416), }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 1.53991494948552447182), BOOST_MATH_BIG_CONSTANT(T, 53, 0.982403709157920235114), @@ -320,8 +336,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) // Expected Error Term: 1.512e-17 // Maximum Relative Change in Control Points: 2.222e-04 // Max Error found at double precision = 2.062515e-17 - static const T Y = 0.5405750274658203125f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T Y = 0.5405750274658203125f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.00295276716530971662634), BOOST_MATH_BIG_CONSTANT(T, 53, 0.0137384425896355332126), BOOST_MATH_BIG_CONSTANT(T, 53, 0.00840807615555585383007), @@ -329,7 +345,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) BOOST_MATH_BIG_CONSTANT(T, 53, 0.000250269961544794627958), BOOST_MATH_BIG_CONSTANT(T, 53, 0.113212406648847561139e-4), }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 1.04217814166938418171), BOOST_MATH_BIG_CONSTANT(T, 53, 0.442597659481563127003), @@ -353,8 +369,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) // Maximum Deviation Found: 2.860e-17 // Expected Error Term: 2.859e-17 // Maximum Relative Change in Control Points: 1.357e-05 - static const T Y = 0.5579090118408203125f; - static const T P[] = { + BOOST_MATH_GPU_STATIC const T Y = 0.5579090118408203125f; + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.00628057170626964891937), BOOST_MATH_BIG_CONSTANT(T, 53, 0.0175389834052493308818), BOOST_MATH_BIG_CONSTANT(T, 53, -0.212652252872804219852), @@ -363,7 +379,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) BOOST_MATH_BIG_CONSTANT(T, 53, -3.22729451764143718517), BOOST_MATH_BIG_CONSTANT(T, 53, -2.8175401114513378771), }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.79257750980575282228), BOOST_MATH_BIG_CONSTANT(T, 53, 11.0567237927800161565), @@ -394,15 +410,16 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<53>& t) if(invert) { - result = 1 - result; + prefix_adder += prefix_multiplier * 1; + prefix_multiplier = -prefix_multiplier; } - return result; + return prefix_adder + prefix_multiplier * result; } // template T erf_imp(T z, bool invert, const Lanczos& l, const mpl::int_<53>& t) template -T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<64>& t) +BOOST_GPU_ENABLED T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<64>& t) { BOOST_MATH_STD_USING @@ -636,7 +653,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<64>& t) template -T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<113>& t) +BOOST_GPU_ENABLED T erf_imp(T z, bool invert, const Policy& pol, const mpl::int_<113>& t) { BOOST_MATH_STD_USING @@ -1146,9 +1163,11 @@ struct erf_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -1158,7 +1177,7 @@ const typename erf_initializer::init erf_initializer -inline typename tools::promote_args::type erf(T z, const Policy& /* pol */) +inline BOOST_GPU_ENABLED typename tools::promote_args::type erf(T z, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1204,7 +1223,7 @@ inline typename tools::promote_args::type erf(T z, const Policy& /* pol */) } template -inline typename tools::promote_args::type erfc(T z, const Policy& /* pol */) +inline BOOST_GPU_ENABLED typename tools::promote_args::type erfc(T z, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -1250,13 +1269,13 @@ inline typename tools::promote_args::type erfc(T z, const Policy& /* pol */) } template -inline typename tools::promote_args::type erf(T z) +inline BOOST_GPU_ENABLED typename tools::promote_args::type erf(T z) { return boost::math::erf(z, policies::policy<>()); } template -inline typename tools::promote_args::type erfc(T z) +inline BOOST_GPU_ENABLED typename tools::promote_args::type erfc(T z) { return boost::math::erfc(z, policies::policy<>()); } diff --git a/include/boost/math/special_functions/expint.hpp b/include/boost/math/special_functions/expint.hpp index fc656f3355..da6c7796fa 100644 --- a/include/boost/math/special_functions/expint.hpp +++ b/include/boost/math/special_functions/expint.hpp @@ -34,7 +34,7 @@ template inline T expint_1_rational(const T& z, const mpl::int_<0>&) { // this function is never actually called - BOOST_ASSERT(0); + BOOST_MATH_ASSERT(0); return z; } diff --git a/include/boost/math/special_functions/expm1.hpp b/include/boost/math/special_functions/expm1.hpp index 9cecc89756..c65d7b9b4b 100644 --- a/include/boost/math/special_functions/expm1.hpp +++ b/include/boost/math/special_functions/expm1.hpp @@ -41,10 +41,10 @@ namespace detail { typedef T result_type; - expm1_series(T x) + BOOST_GPU_ENABLED expm1_series(T x) : k(0), m_x(x), m_term(1) {} - T operator()() + BOOST_GPU_ENABLED T operator()() { ++k; m_term *= m_x; @@ -52,7 +52,7 @@ namespace detail return m_term; } - int count()const + BOOST_GPU_ENABLED int count()const { return k; } @@ -87,9 +87,11 @@ struct expm1_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -102,7 +104,7 @@ const typename expm1_initializer::init expm1_initializer |x| > epsilon. // template -T expm1_imp(T x, const mpl::int_<0>&, const Policy& pol) +BOOST_GPU_ENABLED T expm1_imp(T x, const mpl::int_<0>&, const Policy& pol) { BOOST_MATH_STD_USING @@ -136,7 +138,7 @@ T expm1_imp(T x, const mpl::int_<0>&, const Policy& pol) } template -T expm1_imp(T x, const mpl::int_<53>&, const P& pol) +BOOST_GPU_ENABLED T expm1_imp(T x, const mpl::int_<53>&, const P& pol) { BOOST_MATH_STD_USING @@ -154,16 +156,16 @@ T expm1_imp(T x, const mpl::int_<53>&, const P& pol) if(a < tools::epsilon()) return x; - static const float Y = 0.10281276702880859e1f; - static const T n[] = { static_cast(-0.28127670288085937e-1), static_cast(0.51278186299064534e0), static_cast(-0.6310029069350198e-1), static_cast(0.11638457975729296e-1), static_cast(-0.52143390687521003e-3), static_cast(0.21491399776965688e-4) }; - static const T d[] = { 1, static_cast(-0.45442309511354755e0), static_cast(0.90850389570911714e-1), static_cast(-0.10088963629815502e-1), static_cast(0.63003407478692265e-3), static_cast(-0.17976570003654402e-4) }; + BOOST_MATH_GPU_STATIC const float Y = 0.10281276702880859e1f; + BOOST_MATH_GPU_STATIC const T n[] = { static_cast(-0.28127670288085937e-1), static_cast(0.51278186299064534e0), static_cast(-0.6310029069350198e-1), static_cast(0.11638457975729296e-1), static_cast(-0.52143390687521003e-3), static_cast(0.21491399776965688e-4) }; + BOOST_MATH_GPU_STATIC const T d[] = { 1, static_cast(-0.45442309511354755e0), static_cast(0.90850389570911714e-1), static_cast(-0.10088963629815502e-1), static_cast(0.63003407478692265e-3), static_cast(-0.17976570003654402e-4) }; T result = x * Y + x * tools::evaluate_polynomial(n, x) / tools::evaluate_polynomial(d, x); return result; } template -T expm1_imp(T x, const mpl::int_<64>&, const P& pol) +BOOST_GPU_ENABLED T expm1_imp(T x, const mpl::int_<64>&, const P& pol) { BOOST_MATH_STD_USING @@ -181,8 +183,8 @@ T expm1_imp(T x, const mpl::int_<64>&, const P& pol) if(a < tools::epsilon()) return x; - static const float Y = 0.10281276702880859375e1f; - static const T n[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.10281276702880859375e1f; + BOOST_MATH_GPU_STATIC const T n[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.281276702880859375e-1), BOOST_MATH_BIG_CONSTANT(T, 64, 0.512980290285154286358e0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.667758794592881019644e-1), @@ -191,7 +193,7 @@ T expm1_imp(T x, const mpl::int_<64>&, const P& pol) BOOST_MATH_BIG_CONSTANT(T, 64, 0.447441185192951335042e-4), BOOST_MATH_BIG_CONSTANT(T, 64, -0.714539134024984593011e-6) }; - static const T d[] = { + BOOST_MATH_GPU_STATIC const T d[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.461477618025562520389e0), BOOST_MATH_BIG_CONSTANT(T, 64, 0.961237488025708540713e-1), @@ -206,7 +208,7 @@ T expm1_imp(T x, const mpl::int_<64>&, const P& pol) } template -T expm1_imp(T x, const mpl::int_<113>&, const P& pol) +BOOST_GPU_ENABLED T expm1_imp(T x, const mpl::int_<113>&, const P& pol) { BOOST_MATH_STD_USING @@ -224,8 +226,8 @@ T expm1_imp(T x, const mpl::int_<113>&, const P& pol) if(a < tools::epsilon()) return x; - static const float Y = 0.10281276702880859375e1f; - static const T n[] = { + BOOST_MATH_GPU_STATIC const float Y = 0.10281276702880859375e1f; + BOOST_MATH_GPU_STATIC const T n[] = { BOOST_MATH_BIG_CONSTANT(T, 113, -0.28127670288085937499999999999999999854e-1), BOOST_MATH_BIG_CONSTANT(T, 113, 0.51278156911210477556524452177540792214e0), BOOST_MATH_BIG_CONSTANT(T, 113, -0.63263178520747096729500254678819588223e-1), @@ -237,7 +239,7 @@ T expm1_imp(T x, const mpl::int_<113>&, const P& pol) BOOST_MATH_BIG_CONSTANT(T, 113, -0.15995603306536496772374181066765665596e-8), BOOST_MATH_BIG_CONSTANT(T, 113, 0.45261820069007790520447958280473183582e-10) }; - static const T d[] = { + BOOST_MATH_GPU_STATIC const T d[] = { BOOST_MATH_BIG_CONSTANT(T, 113, 1.0), BOOST_MATH_BIG_CONSTANT(T, 113, -0.45441264709074310514348137469214538853e0), BOOST_MATH_BIG_CONSTANT(T, 113, 0.96827131936192217313133611655555298106e-1), @@ -258,7 +260,7 @@ T expm1_imp(T x, const mpl::int_<113>&, const P& pol) } // namespace detail template -inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) +inline BOOST_GPU_ENABLED typename tools::promote_args::type expm1(T x, const Policy& /* pol */) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -304,18 +306,18 @@ inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) #if defined(BOOST_HAS_EXPM1) && !(defined(__osf__) && defined(__DECCXX_VER)) # ifdef BOOST_MATH_USE_C99 -inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } +inline BOOST_GPU_ENABLED float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } # ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS -inline long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); } +inline BOOST_GPU_ENABLED long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); } # endif # else -inline float expm1(float x, const policies::policy<>&){ return static_cast(::expm1(x)); } +inline BOOST_GPU_ENABLED float expm1(float x, const policies::policy<>&){ return static_cast(::expm1(x)); } # endif -inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); } +inline BOOST_GPU_ENABLED double expm1(double x, const policies::policy<>&){ return ::expm1(x); } #endif template -inline typename tools::promote_args::type expm1(T x) +inline BOOST_GPU_ENABLED typename tools::promote_args::type expm1(T x) { return expm1(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/factorials.hpp b/include/boost/math/special_functions/factorials.hpp index e36a098bb6..92cf55ffea 100644 --- a/include/boost/math/special_functions/factorials.hpp +++ b/include/boost/math/special_functions/factorials.hpp @@ -27,7 +27,7 @@ namespace boost { namespace math { template -inline T factorial(unsigned i, const Policy& pol) +inline BOOST_GPU_ENABLED T factorial(unsigned i, const Policy& pol) { BOOST_STATIC_ASSERT(!boost::is_integral::value); // factorial(n) is not implemented @@ -48,7 +48,7 @@ inline T factorial(unsigned i, const Policy& pol) } template -inline T factorial(unsigned i) +inline BOOST_GPU_ENABLED T factorial(unsigned i) { return factorial(i, policies::policy<>()); } @@ -71,7 +71,7 @@ inline double factorial(unsigned i) } */ template -T double_factorial(unsigned i, const Policy& pol) +BOOST_GPU_ENABLED T double_factorial(unsigned i, const Policy& pol) { BOOST_STATIC_ASSERT(!boost::is_integral::value); BOOST_MATH_STD_USING // ADL lookup of std names @@ -106,7 +106,7 @@ T double_factorial(unsigned i, const Policy& pol) } template -inline T double_factorial(unsigned i) +inline BOOST_GPU_ENABLED T double_factorial(unsigned i) { return double_factorial(i, policies::policy<>()); } @@ -114,7 +114,7 @@ inline T double_factorial(unsigned i) namespace detail{ template -T rising_factorial_imp(T x, int n, const Policy& pol) +BOOST_GPU_ENABLED T rising_factorial_imp(T x, int n, const Policy& pol) { BOOST_STATIC_ASSERT(!boost::is_integral::value); if(x < 0) @@ -162,11 +162,11 @@ T rising_factorial_imp(T x, int n, const Policy& pol) } template -inline T falling_factorial_imp(T x, unsigned n, const Policy& pol) +inline BOOST_GPU_ENABLED T falling_factorial_imp(T x, unsigned n, const Policy& pol) { BOOST_STATIC_ASSERT(!boost::is_integral::value); BOOST_MATH_STD_USING // ADL of std names - if((x == 0) && (n >= 0)) + if(x == 0) return 0; if(x < 0) { @@ -226,7 +226,7 @@ inline T falling_factorial_imp(T x, unsigned n, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type falling_factorial(RT x, unsigned n) { typedef typename tools::promote_args::type result_type; @@ -235,7 +235,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type falling_factorial(RT x, unsigned n, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -244,7 +244,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type rising_factorial(RT x, int n) { typedef typename tools::promote_args::type result_type; @@ -253,7 +253,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type rising_factorial(RT x, int n, const Policy& pol) { typedef typename tools::promote_args::type result_type; diff --git a/include/boost/math/special_functions/fpclassify.hpp b/include/boost/math/special_functions/fpclassify.hpp index d83e111c48..b93f76efe6 100644 --- a/include/boost/math/special_functions/fpclassify.hpp +++ b/include/boost/math/special_functions/fpclassify.hpp @@ -18,6 +18,11 @@ #include #include #include + +#ifdef __CUDA_ARCH__ +#include +#endif + /*! \file fpclassify.hpp \brief Classify floating-point value as normal, subnormal, zero, infinite, or NaN. @@ -633,6 +638,78 @@ inline bool (isnan)(__float128 x) } #endif +#ifdef __CUDA_ARCH__ + +template<> inline BOOST_GPU_ENABLED bool (isfinite)(float x) { return ::isfinite(x); } +template<> inline BOOST_GPU_ENABLED bool (isfinite)(double x) { return ::isfinite(x); } + +template<> inline BOOST_GPU_ENABLED bool (isnan)(float x) { return ::isnan(x); } +template<> inline BOOST_GPU_ENABLED bool (isnan)(double x) { return ::isnan(x); } + +template<> inline BOOST_GPU_ENABLED bool (isinf)(float x) { return ::isinf(x); } +template<> inline BOOST_GPU_ENABLED bool (isinf)(double x) { return ::isinf(x); } + +template<> inline BOOST_GPU_ENABLED bool (isnormal)(float x) +{ + if(x < 0) x = -x; + return (x >= FLT_MIN) && (x <= FLT_MAX); +} +template<> inline BOOST_GPU_ENABLED bool (isnormal)(double x) +{ + if(x < 0) x = -x; + return (x >= DBL_MIN) && (x <= DBL_MAX); +} + +template<> inline BOOST_GPU_ENABLED int (fpclassify)(float t) +{ + if((boost::math::isnan)(t)) + return FP_NAN; + // std::fabs broken on a few systems especially for long long!!!! + float at = (t < 0.0f) ? -t : t; + + // Use a process of exclusion to figure out + // what kind of type we have, this relies on + // IEEE conforming reals that will treat + // Nan's as unordered. Some compilers + // don't do this once optimisations are + // turned on, hence the check for nan's above. + if(at <= FLT_MAX) + { + if(at >= FLT_MIN) + return FP_NORMAL; + return (at != 0) ? FP_SUBNORMAL : FP_ZERO; + } + else if(at > FLT_MAX) + return FP_INFINITE; + return FP_NAN; +} + +template<> inline BOOST_GPU_ENABLED int (fpclassify)(double t) +{ + if((boost::math::isnan)(t)) + return FP_NAN; + // std::fabs broken on a few systems especially for long long!!!! + double at = (t < 0.0) ? -t : t; + + // Use a process of exclusion to figure out + // what kind of type we have, this relies on + // IEEE conforming reals that will treat + // Nan's as unordered. Some compilers + // don't do this once optimisations are + // turned on, hence the check for nan's above. + if(at <= DBL_MAX) + { + if(at >= DBL_MIN) + return FP_NORMAL; + return (at != 0) ? FP_SUBNORMAL : FP_ZERO; + } + else if(at > DBL_MAX) + return FP_INFINITE; + return FP_NAN; +} + +#endif + } // namespace math } // namespace boost diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index 1903b997cb..1203f33d46 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -59,13 +60,13 @@ namespace boost{ namespace math{ namespace detail{ template -inline bool is_odd(T v, const boost::true_type&) +inline BOOST_GPU_ENABLED bool is_odd(T v, const boost::true_type&) { int i = static_cast(v); return i&1; } template -inline bool is_odd(T v, const boost::false_type&) +inline BOOST_GPU_ENABLED bool is_odd(T v, const boost::false_type&) { // Oh dear can't cast T to int! BOOST_MATH_STD_USING @@ -73,13 +74,13 @@ inline bool is_odd(T v, const boost::false_type&) return static_cast(modulus != 0); } template -inline bool is_odd(T v) +inline BOOST_GPU_ENABLED bool is_odd(T v) { return is_odd(v, ::boost::is_convertible()); } template -T sinpx(T z) +BOOST_GPU_ENABLED T sinpx(T z) { // Ad hoc function calculates x * sin(pi * x), // taking extra care near when x is near a whole number. @@ -101,7 +102,7 @@ T sinpx(T z) { dist = z - fl; } - BOOST_ASSERT(fl >= 0); + BOOST_MATH_ASSERT(fl >= 0); if(dist > 0.5) dist = 1 - dist; T result = sin(dist*boost::math::constants::pi()); @@ -111,57 +112,19 @@ T sinpx(T z) // tgamma(z), with Lanczos support: // template -T gamma_imp(T z, const Policy& pol, const Lanczos& l) +BOOST_GPU_ENABLED T gamma_positive_imp(T z, const Policy& pol, const Lanczos& l, const char* function) { BOOST_MATH_STD_USING - T result = 1; - -#ifdef BOOST_MATH_INSTRUMENT - static bool b = false; - if(!b) - { - std::cout << "tgamma_imp called with " << typeid(z).name() << " " << typeid(l).name() << std::endl; - b = true; - } -#endif - static const char* function = "boost::math::tgamma<%1%>(%1%)"; - - if(z <= 0) - { - if(floor(z) == z) - return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", z, pol); - if(z <= -20) - { - result = gamma_imp(T(-z), pol, l) * sinpx(z); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - if((fabs(result) < 1) && (tools::max_value() * fabs(result) < boost::math::constants::pi())) - return -boost::math::sign(result) * policies::raise_overflow_error(function, "Result of tgamma is too large to represent.", pol); - result = -boost::math::constants::pi() / result; - if(result == 0) - return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); - if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL) - return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", result, pol); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - return result; - } - - // shift z to > 1: - while(z < 0) - { - result /= z; - z += 1; - } - } BOOST_MATH_INSTRUMENT_VARIABLE(result); if((floor(z) == z) && (z < max_factorial::value)) { result *= unchecked_factorial(itrunc(z, pol) - 1); BOOST_MATH_INSTRUMENT_VARIABLE(result); } - else if (z < tools::root_epsilon()) + else if(z < tools::root_epsilon()) { - if (z < 1 / tools::max_value()) + if(z < 1 / tools::max_value()) result = policies::raise_overflow_error(function, 0, pol); result *= 1 / z - constants::euler(); } @@ -198,14 +161,60 @@ T gamma_imp(T z, const Policy& pol, const Lanczos& l) } return result; } + +template +BOOST_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const Lanczos& l) +{ + BOOST_MATH_STD_USING + + T result = 1; + +#ifdef BOOST_MATH_INSTRUMENT + BOOST_MATH_GPU_STATIC bool b = false; + if(!b) + { + std::cout << "tgamma_imp called with " << typeid(z).name() << " " << typeid(l).name() << std::endl; + b = true; + } +#endif + BOOST_MATH_GPU_STATIC const char* function = "boost::math::tgamma<%1%>(%1%)"; + + if(z <= 0) + { + if(floor(z) == z) + return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", z, pol); + if(z <= -20) + { + result = gamma_positive_imp(T(-z), pol, l, function); + result *= sinpx(z); + BOOST_MATH_INSTRUMENT_VARIABLE(result); + if((fabs(result) < 1) && (tools::max_value() * fabs(result) < boost::math::constants::pi())) + return -boost::math::sign(result) * policies::raise_overflow_error(function, "Result of tgamma is too large to represent.", pol); + result = -boost::math::constants::pi() / result; + if(result == 0) + return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); + if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL) + return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", result, pol); + BOOST_MATH_INSTRUMENT_VARIABLE(result); + return result; + } + // shift z to > 1: + while(z < 0) + { + result /= z; + z += 1; + } + } + return result * gamma_positive_imp(z, pol, l, function); +} // // lgamma(z) with Lanczos support: // template -T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = 0) +BOOST_GPU_ENABLED T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = 0) { #ifdef BOOST_MATH_INSTRUMENT - static bool b = false; + BOOST_MATH_GPU_STATIC bool b = false; if(!b) { std::cout << "lgamma_imp called with " << typeid(z).name() << " " << typeid(l).name() << std::endl; @@ -215,7 +224,7 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = 0) BOOST_MATH_STD_USING - static const char* function = "boost::math::lgamma<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::lgamma<%1%>(%1%)"; T result = 0; int sresult = 1; @@ -295,14 +304,14 @@ struct upper_incomplete_gamma_fract T z, a; int k; public: - typedef std::pair result_type; + typedef boost::math::pair result_type; - upper_incomplete_gamma_fract(T a1, T z1) + BOOST_GPU_ENABLED upper_incomplete_gamma_fract(T a1, T z1) : z(z1-a1+1), a(a1), k(0) { } - result_type operator()() + BOOST_GPU_ENABLED result_type operator()() { ++k; z += 2; @@ -311,7 +320,7 @@ struct upper_incomplete_gamma_fract }; template -inline T upper_gamma_fraction(T a, T z, T eps) +inline BOOST_GPU_ENABLED T upper_gamma_fraction(T a, T z, T eps) { // Multiply result by z^a * e^-z to get the full // upper incomplete integral. Divide by tgamma(z) @@ -327,9 +336,9 @@ struct lower_incomplete_gamma_series T a, z, result; public: typedef T result_type; - lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){} + BOOST_GPU_ENABLED lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){} - T operator()() + BOOST_GPU_ENABLED T operator()() { T r = result; a += 1; @@ -339,7 +348,7 @@ struct lower_incomplete_gamma_series }; template -inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0) +inline BOOST_GPU_ENABLED T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0) { // Multiply result by ((z^a) * (e^-z) / a) to get the full // lower incomplete integral. Then divide by tgamma(a) @@ -357,7 +366,7 @@ inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0) // with Bernoulli numbers. // template -std::size_t highest_bernoulli_index() +BOOST_GPU_ENABLED std::size_t highest_bernoulli_index() { const float digits10_of_type = (std::numeric_limits::is_specialized ? static_cast(std::numeric_limits::digits10) @@ -368,7 +377,7 @@ std::size_t highest_bernoulli_index() } template -T minimum_argument_for_bernoulli_recursion() +BOOST_GPU_ENABLED T minimum_argument_for_bernoulli_recursion() { const float digits10_of_type = (std::numeric_limits::is_specialized ? static_cast(std::numeric_limits::digits10) @@ -379,14 +388,14 @@ T minimum_argument_for_bernoulli_recursion() // Forward declaration of the lgamma_imp template specialization. template -T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sign = 0); +BOOST_GPU_ENABLED T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sign = 0); template -T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&) +BOOST_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&) { BOOST_MATH_STD_USING - static const char* function = "boost::math::tgamma<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::tgamma<%1%>(%1%)"; // Check if the argument of tgamma is identically zero. const bool is_at_zero = (z == 0); @@ -527,11 +536,11 @@ inline T log_gamma_near_1(const T& z, Policy const& pol) } template -T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sign) +BOOST_GPU_ENABLED T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sign) { BOOST_MATH_STD_USING - static const char* function = "boost::math::lgamma<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::lgamma<%1%>(%1%)"; // Check if the argument of lgamma is identically zero. const bool is_at_zero = (z == 0); @@ -666,7 +675,7 @@ T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sig // used by the upper incomplete gamma with z < 1: // template -T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l) +BOOST_GPU_ENABLED T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l) { BOOST_MATH_STD_USING @@ -724,8 +733,8 @@ T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l) } template -inline T tgammap1m1_imp(T z, Policy const& pol, - const ::boost::math::lanczos::undefined_lanczos&) +inline BOOST_GPU_ENABLED T tgammap1m1_imp(T z, Policy const& pol, + const ::boost::math::lanczos::undefined_lanczos& l) { BOOST_MATH_STD_USING // ADL of std names @@ -744,9 +753,9 @@ struct small_gamma2_series { typedef T result_type; - small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){} + BOOST_GPU_ENABLED small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){} - T operator()() + BOOST_GPU_ENABLED T operator()() { T r = result / (apn); result *= x; @@ -764,7 +773,7 @@ struct small_gamma2_series // incomplete gammas: // template -T full_igamma_prefix(T a, T z, const Policy& pol) +BOOST_GPU_ENABLED T full_igamma_prefix(T a, T z, const Policy& pol) { BOOST_MATH_STD_USING @@ -815,7 +824,7 @@ T full_igamma_prefix(T a, T z, const Policy& pol) // most if the error occurs in this function: // template -T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) +BOOST_GPU_ENABLED T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) { BOOST_MATH_STD_USING T agh = a + static_cast(Lanczos::g()) - T(0.5); @@ -836,7 +845,13 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) if(z <= tools::log_min_value()) { // Oh dear, have to use logs, should be free of cancellation errors though: +#ifdef __CUDA_ARCH__ + // Even though boost::math::lgamma is definitely not recursive, using it here + // causes CUDA to think that this function is :( + return exp(a * log(z) - z - std::lgamma(a)); +#else return exp(a * log(z) - z - lgamma_imp(a, pol, l)); +#endif } else { @@ -860,16 +875,16 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) // T alz = a * log(z / agh); T amz = a - z; - if(((std::min)(alz, amz) <= tools::log_min_value()) || ((std::max)(alz, amz) >= tools::log_max_value())) + if((BOOST_MATH_CUDA_SAFE_MIN(alz, amz) <= tools::log_min_value()) || (BOOST_MATH_CUDA_SAFE_MAX(alz, amz) >= tools::log_max_value())) { T amza = amz / a; - if(((std::min)(alz, amz)/2 > tools::log_min_value()) && ((std::max)(alz, amz)/2 < tools::log_max_value())) + if((BOOST_MATH_CUDA_SAFE_MIN(alz, amz)/2 > tools::log_min_value()) && (BOOST_MATH_CUDA_SAFE_MAX(alz, amz)/2 < tools::log_max_value())) { // compute square root of the result and then square it: T sq = pow(z / agh, a / 2) * exp(amz / 2); prefix = sq * sq; } - else if(((std::min)(alz, amz)/4 > tools::log_min_value()) && ((std::max)(alz, amz)/4 < tools::log_max_value()) && (z > a)) + else if((BOOST_MATH_CUDA_SAFE_MIN(alz, amz)/4 > tools::log_min_value()) && (BOOST_MATH_CUDA_SAFE_MAX(alz, amz)/4 < tools::log_max_value()) && (z > a)) { // compute the 4th root of the result then square it twice: T sq = pow(z / agh, a / 4) * exp(amz / 4); @@ -897,11 +912,11 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l) // And again, without Lanczos support: // template -T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined_lanczos&) +BOOST_GPU_ENABLED T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined_lanczos&) { BOOST_MATH_STD_USING - T limit = (std::max)(T(10), a); + T limit = BOOST_MATH_CUDA_SAFE_MAX(T(10), a); T sum = detail::lower_gamma_series(a, limit, pol) / a; sum += detail::upper_gamma_fraction(a, limit, ::boost::math::policies::get_epsilon()); @@ -922,7 +937,7 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined T amz = a - z; T alzoa = a * log(zoa); T prefix; - if(((std::min)(alzoa, amz) <= tools::log_min_value()) || ((std::max)(alzoa, amz) >= tools::log_max_value())) + if((BOOST_MATH_CUDA_SAFE_MIN(alzoa, amz) <= tools::log_min_value()) || (BOOST_MATH_CUDA_SAFE_MAX(alzoa, amz) >= tools::log_max_value())) { T amza = amz / a; if((amza <= tools::log_min_value()) || (amza >= tools::log_max_value())) @@ -945,7 +960,7 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined // Upper gamma fraction for very small a: // template -inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0) +BOOST_GPU_ENABLED inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0) { BOOST_MATH_STD_USING // ADL of std functions. // @@ -974,7 +989,7 @@ inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool // Upper gamma fraction for integer a: // template -inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) +BOOST_GPU_ENABLED inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) { // // Calculates normalised Q when a is an integer: @@ -1002,7 +1017,7 @@ inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0) // Upper gamma fraction for half integer a: // template -T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) +BOOST_GPU_ENABLED T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) { // // Calculates normalised Q when a is a half-integer: @@ -1013,7 +1028,7 @@ T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) { T term = exp(-x) / sqrt(constants::pi() * x); term *= x; - static const T half = T(1) / 2; + BOOST_MATH_GPU_STATIC const T half = T(1) / 2; term /= half; T sum = term; for(unsigned n = 2; n < a; ++n) @@ -1039,10 +1054,10 @@ T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol) // Main incomplete gamma entry point, handles all four incomplete gamma's: // template -T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, +BOOST_GPU_ENABLED T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, const Policy& pol, T* p_derivative) { - static const char* function = "boost::math::gamma_p<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::gamma_p<%1%>(%1%, %1%)"; if(a <= 0) return policies::raise_domain_error(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol); if(x < 0) @@ -1054,6 +1069,7 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, T result = 0; // Just to avoid warning C4701: potentially uninitialized local variable 'result' used +#ifndef __CUDA_ARCH__ // this branch causes recursion on CUDA if(a >= max_factorial::value && !normalised) { // @@ -1117,8 +1133,9 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, return policies::raise_overflow_error(function, 0, pol); return exp(result); } +#endif // CUDA - BOOST_ASSERT((p_derivative == 0) || (normalised == true)); + BOOST_MATH_ASSERT((p_derivative == 0) || (normalised == true)); bool is_int, is_half_int; bool is_small_a = (a < 30) && (a <= x + 1) && (x < tools::log_max_value()); @@ -1396,7 +1413,7 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert, // Ratios of two gamma functions: // template -T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l) +BOOST_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l) { BOOST_MATH_STD_USING if(z < tools::epsilon()) @@ -1451,7 +1468,7 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& // And again without Lanczos support this time: // template -T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos::undefined_lanczos&) +BOOST_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos::undefined_lanczos&) { BOOST_MATH_STD_USING // @@ -1488,7 +1505,7 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos: } template -T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) +BOOST_GPU_ENABLED T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) { BOOST_MATH_STD_USING @@ -1546,7 +1563,7 @@ T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol) } template -T tgamma_ratio_imp(T x, T y, const Policy& pol) +BOOST_GPU_ENABLED T tgamma_ratio_imp(T x, T y, const Policy& pol) { BOOST_MATH_STD_USING @@ -1615,7 +1632,7 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol) } template -T gamma_p_derivative_imp(T a, T x, const Policy& pol) +BOOST_GPU_ENABLED T gamma_p_derivative_imp(T a, T x, const Policy& pol) { BOOST_MATH_STD_USING // @@ -1656,7 +1673,7 @@ T gamma_p_derivative_imp(T a, T x, const Policy& pol) } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T z, const Policy& /* pol */, const mpl::true_) { BOOST_FPU_EXCEPTION_GUARD @@ -1714,9 +1731,11 @@ struct igamma_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -1765,9 +1784,11 @@ struct lgamma_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -1775,7 +1796,7 @@ template const typename lgamma_initializer::init lgamma_initializer::initializer; template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T1 a, T2 z, const Policy&, const mpl::false_) { BOOST_FPU_EXCEPTION_GUARD @@ -1798,7 +1819,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T1 a, T2 z, const mpl::false_ tag) { return tgamma(a, z, policies::policy<>(), tag); @@ -1808,14 +1829,14 @@ inline typename tools::promote_args::type } // namespace detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T z) { return tgamma(z, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(T z, int* sign, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1835,28 +1856,28 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(T z, int* sign) { return lgamma(z, sign, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(T x, const Policy& pol) { return ::boost::math::lgamma(x, 0, pol); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(T x) { return ::boost::math::lgamma(x, 0, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma1pm1(T z, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -1874,7 +1895,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma1pm1(T z) { return tgamma1pm1(z, policies::policy<>()); @@ -1884,7 +1905,7 @@ inline typename tools::promote_args::type // Full upper incomplete gamma: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T1 a, T2 z) { // @@ -1895,7 +1916,7 @@ inline typename tools::promote_args::type return detail::tgamma(a, z, maybe_policy()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(T1 a, T2 z, const Policy& pol) { return detail::tgamma(a, z, pol, mpl::false_()); @@ -1904,7 +1925,7 @@ inline typename tools::promote_args::type // Full lower incomplete gamma: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_lower(T1 a, T2 z, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1926,7 +1947,7 @@ inline typename tools::promote_args::type forwarding_policy(), static_cast(0)), "tgamma_lower<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_lower(T1 a, T2 z) { return tgamma_lower(a, z, policies::policy<>()); @@ -1935,7 +1956,7 @@ inline typename tools::promote_args::type // Regularised upper incomplete gamma: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_q(T1 a, T2 z, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -1957,7 +1978,7 @@ inline typename tools::promote_args::type forwarding_policy(), static_cast(0)), "gamma_q<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_q(T1 a, T2 z) { return gamma_q(a, z, policies::policy<>()); @@ -1966,7 +1987,7 @@ inline typename tools::promote_args::type // Regularised lower incomplete gamma: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p(T1 a, T2 z, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -1988,7 +2009,7 @@ inline typename tools::promote_args::type forwarding_policy(), static_cast(0)), "gamma_p<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p(T1 a, T2 z) { return gamma_p(a, z, policies::policy<>()); @@ -1996,7 +2017,7 @@ inline typename tools::promote_args::type // ratios of gamma functions: template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta, const Policy& /* pol */) { BOOST_FPU_EXCEPTION_GUARD @@ -2012,13 +2033,13 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::tgamma_delta_ratio_imp(static_cast(z), static_cast(delta), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta) { return tgamma_delta_ratio(z, delta, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_ratio(T1 a, T2 b, const Policy&) { typedef typename tools::promote_args::type result_type; @@ -2033,14 +2054,14 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::tgamma_ratio_imp(static_cast(a), static_cast(b), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_ratio(T1 a, T2 b) { return tgamma_ratio(a, b, policies::policy<>()); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x, const Policy&) { BOOST_FPU_EXCEPTION_GUARD @@ -2056,7 +2077,7 @@ inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::gamma_p_derivative_imp(static_cast(a), static_cast(x), forwarding_policy()), "boost::math::gamma_p_derivative<%1%>(%1%, %1%)"); } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x) { return gamma_p_derivative(a, x, policies::policy<>()); diff --git a/include/boost/math/special_functions/heuman_lambda.hpp b/include/boost/math/special_functions/heuman_lambda.hpp index 6389443ee4..383d2e5275 100644 --- a/include/boost/math/special_functions/heuman_lambda.hpp +++ b/include/boost/math/special_functions/heuman_lambda.hpp @@ -27,13 +27,13 @@ namespace detail{ // Elliptic integral - Jacobi Zeta template -T heuman_lambda_imp(T phi, T k, const Policy& pol) +BOOST_GPU_ENABLED T heuman_lambda_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; using namespace boost::math::constants; - const char* function = "boost::math::heuman_lambda<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::heuman_lambda<%1%>(%1%, %1%)"; if(fabs(k) > 1) return policies::raise_domain_error(function, "We require |k| <= 1 but got k = %1%", k, pol); @@ -68,7 +68,7 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol) } // detail template -inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -76,7 +76,7 @@ inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, co } template -inline typename tools::promote_args::type heuman_lambda(T1 k, T2 phi) +inline BOOST_GPU_ENABLED typename tools::promote_args::type heuman_lambda(T1 k, T2 phi) { return boost::math::heuman_lambda(k, phi, policies::policy<>()); } diff --git a/include/boost/math/special_functions/jacobi_zeta.hpp b/include/boost/math/special_functions/jacobi_zeta.hpp index a3fa54746e..50837c3ac7 100644 --- a/include/boost/math/special_functions/jacobi_zeta.hpp +++ b/include/boost/math/special_functions/jacobi_zeta.hpp @@ -26,7 +26,7 @@ namespace detail{ // Elliptic integral - Jacobi Zeta template -T jacobi_zeta_imp(T phi, T k, const Policy& pol) +BOOST_GPU_ENABLED T jacobi_zeta_imp(T phi, T k, const Policy& pol) { BOOST_MATH_STD_USING using namespace boost::math::tools; @@ -55,7 +55,7 @@ T jacobi_zeta_imp(T phi, T k, const Policy& pol) } // detail template -inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -63,7 +63,7 @@ inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, cons } template -inline typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi) +inline BOOST_GPU_ENABLED typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi) { return boost::math::jacobi_zeta(k, phi, policies::policy<>()); } diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp index c1ff86930b..30a873677b 100644 --- a/include/boost/math/special_functions/lanczos.hpp +++ b/include/boost/math/special_functions/lanczos.hpp @@ -74,10 +74,10 @@ struct lanczos6 : public mpl::int_<35> // double precision: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[6] = { + BOOST_MATH_GPU_STATIC const T num[6] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 8706.349592549009182288174442774377925882)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 8523.650341121874633477483696775067709735)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 3338.029219476423550899999750161289306564)), @@ -85,7 +85,7 @@ struct lanczos6 : public mpl::int_<35> static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 63.99951844938187085666201263218840287667)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.506628274631006311133031631822390264407)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { static_cast(0u), static_cast(24u), static_cast(50u), @@ -97,10 +97,10 @@ struct lanczos6 : public mpl::int_<35> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[6] = { + BOOST_MATH_GPU_STATIC const T num[6] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 32.81244541029783471623665933780748627823)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 32.12388941444332003446077108933558534361)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 12.58034729455216106950851080138931470954)), @@ -108,7 +108,7 @@ struct lanczos6 : public mpl::int_<35> static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.2412010548258800231126240760264822486599)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.009446967704539249494420221613134244048319)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { static_cast(0u), static_cast(24u), static_cast(50u), @@ -121,10 +121,10 @@ struct lanczos6 : public mpl::int_<35> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[5] = { + BOOST_MATH_GPU_STATIC const T d[5] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.044879010930422922760429926121241330235)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, -2.751366405578505366591317846728753993668)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 1.02282965224225004296750609604264824677)), @@ -140,10 +140,10 @@ struct lanczos6 : public mpl::int_<35> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[5] = { + BOOST_MATH_GPU_STATIC const T d[5] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 5.748142489536043490764289256167080091892)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, -7.734074268282457156081021756682138251825)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.875167944990511006997713242805893543947)), @@ -159,7 +159,7 @@ struct lanczos6 : public mpl::int_<35> return result; } - static double g(){ return 5.581000000000000405009359383257105946541; } + static BOOST_GPU_ENABLED double g(){ return 5.581000000000000405009359383257105946541; } }; // @@ -174,10 +174,10 @@ struct lanczos11 : public mpl::int_<60> // extended-double precision: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[11] = { + BOOST_MATH_GPU_STATIC const T num[11] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 38474670393.31776828316099004518914832218)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 36857665043.51950660081971227404959150474)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 15889202453.72942008945006665994637853242)), @@ -190,7 +190,7 @@ struct lanczos11 : public mpl::int_<60> static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 261.6140441641668190791708576058805625502)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 2.506628274631000502415573855452633787834)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[11] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[11] = { static_cast(0u), static_cast(362880u), static_cast(1026576u), @@ -207,10 +207,10 @@ struct lanczos11 : public mpl::int_<60> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[11] = { + BOOST_MATH_GPU_STATIC const T num[11] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 709811.662581657956893540610814842699825)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 679979.847415722640161734319823103390728)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 293136.785721159725251629480984140341656)), @@ -223,7 +223,7 @@ struct lanczos11 : public mpl::int_<60> static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.004826466289237661857584712046231435101741)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.4624429436045378766270459638520555557321e-4)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[11] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[11] = { static_cast(0u), static_cast(362880u), static_cast(1026576u), @@ -241,10 +241,10 @@ struct lanczos11 : public mpl::int_<60> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[10] = { + BOOST_MATH_GPU_STATIC const T d[10] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 4.005853070677940377969080796551266387954)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, -13.17044315127646469834125159673527183164)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 17.19146865350790353683895137079288129318)), @@ -265,10 +265,10 @@ struct lanczos11 : public mpl::int_<60> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[10] = { + BOOST_MATH_GPU_STATIC const T d[10] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 19.05889633808148715159575716844556056056)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, -62.66183664701721716960978577959655644762)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 81.7929198065004751699057192860287512027)), @@ -289,7 +289,7 @@ struct lanczos11 : public mpl::int_<60> return result; } - static double g(){ return 10.90051099999999983936049829935654997826; } + static BOOST_GPU_ENABLED double g(){ return 10.90051099999999983936049829935654997826; } }; // @@ -304,10 +304,10 @@ struct lanczos13 : public mpl::int_<72> // higher precision: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[13] = { + BOOST_MATH_GPU_STATIC const T num[13] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 44012138428004.60895436261759919070125699)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 41590453358593.20051581730723108131357995)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 18013842787117.99677796276038389462742949)), @@ -322,7 +322,7 @@ struct lanczos13 : public mpl::int_<72> static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 381.8801248632926870394389468349331394196)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 2.506628274631000502415763426076722427007)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { static_cast(0u), static_cast(39916800u), static_cast(120543840u), @@ -341,10 +341,10 @@ struct lanczos13 : public mpl::int_<72> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[13] = { + BOOST_MATH_GPU_STATIC const T num[13] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 86091529.53418537217994842267760536134841)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 81354505.17858011242874285785316135398567)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 35236626.38815461910817650960734605416521)), @@ -359,7 +359,7 @@ struct lanczos13 : public mpl::int_<72> static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.0007469903808915448316510079585999893674101)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.4903180573459871862552197089738373164184e-5)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { static_cast(0u), static_cast(39916800u), static_cast(120543840u), @@ -379,10 +379,10 @@ struct lanczos13 : public mpl::int_<72> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[12] = { + BOOST_MATH_GPU_STATIC const T d[12] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 4.832115561461656947793029596285626840312)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, -19.86441536140337740383120735104359034688)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 33.9927422807443239927197864963170585331)), @@ -405,10 +405,10 @@ struct lanczos13 : public mpl::int_<72> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[12] = { + BOOST_MATH_GPU_STATIC const T d[12] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 26.96979819614830698367887026728396466395)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, -110.8705424709385114023884328797900204863)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 189.7258846119231466417015694690434770085)), @@ -431,7 +431,7 @@ struct lanczos13 : public mpl::int_<72> return result; } - static double g(){ return 13.1445650000000000545696821063756942749; } + static BOOST_GPU_ENABLED double g(){ return 13.1445650000000000545696821063756942749; } }; // @@ -446,10 +446,10 @@ struct lanczos22 : public mpl::int_<120> // evaluated at higher precision: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[22] = { + BOOST_MATH_GPU_STATIC const T num[22] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 46198410803245094237463011094.12173081986)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 43735859291852324413622037436.321513777)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 19716607234435171720534556386.97481377748)), @@ -473,7 +473,7 @@ struct lanczos22 : public mpl::int_<120> static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 1167.501919472435718934219997431551246996)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 2.50662827463100050241576528481104525333)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[22] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[22] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(2432902008176640000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(8752948036761600000, uLL), @@ -501,10 +501,10 @@ struct lanczos22 : public mpl::int_<120> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[22] = { + BOOST_MATH_GPU_STATIC const T num[22] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 6939996264376682180.277485395074954356211)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 6570067992110214451.87201438870245659384)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 2961859037444440551.986724631496417064121)), @@ -528,7 +528,7 @@ struct lanczos22 : public mpl::int_<120> static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 0.1753839324538447655939518484052327068859e-6)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 0.3765495513732730583386223384116545391759e-9)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[22] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[22] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(2432902008176640000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(8752948036761600000, uLL), @@ -557,10 +557,10 @@ struct lanczos22 : public mpl::int_<120> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[21] = { + BOOST_MATH_GPU_STATIC const T d[21] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 8.318998691953337183034781139546384476554)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, -63.15415991415959158214140353299240638675)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 217.3108224383632868591462242669081540163)), @@ -592,10 +592,10 @@ struct lanczos22 : public mpl::int_<120> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[21] = { + BOOST_MATH_GPU_STATIC const T d[21] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 75.39272007105208086018421070699575462226)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, -572.3481967049935412452681346759966390319)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 120, 1969.426202741555335078065370698955484358)), @@ -627,7 +627,7 @@ struct lanczos22 : public mpl::int_<120> return result; } - static double g(){ return 22.61890999999999962710717227309942245483; } + static BOOST_GPU_ENABLED double g(){ return 22.61890999999999962710717227309942245483; } }; // @@ -641,9 +641,9 @@ struct lanczos6m24 : public mpl::int_<24> // Use for float precision, when evaluated as a float: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { - static const T num[6] = { + BOOST_MATH_GPU_STATIC const T num[6] = { static_cast(58.52061591769095910314047740215847630266L), static_cast(182.5248962595894264831189414768236280862L), static_cast(211.0971093028510041839168287718170827259L), @@ -651,7 +651,7 @@ struct lanczos6m24 : public mpl::int_<24> static_cast(27.5192015197455403062503721613097825345L), static_cast(2.50662858515256974113978724717473206342L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { static_cast(0u), static_cast(24u), static_cast(50u), @@ -663,9 +663,9 @@ struct lanczos6m24 : public mpl::int_<24> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { - static const T num[6] = { + BOOST_MATH_GPU_STATIC const T num[6] = { static_cast(14.0261432874996476619570577285003839357L), static_cast(43.74732405540314316089531289293124360129L), static_cast(50.59547402616588964511581430025589038612L), @@ -673,7 +673,7 @@ struct lanczos6m24 : public mpl::int_<24> static_cast(6.595765571169314946316366571954421695196L), static_cast(0.6007854010515290065101128585795542383721L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint16_t) denom[6] = { static_cast(0u), static_cast(24u), static_cast(50u), @@ -686,9 +686,9 @@ struct lanczos6m24 : public mpl::int_<24> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { - static const T d[5] = { + BOOST_MATH_GPU_STATIC const T d[5] = { static_cast(0.4922488055204602807654354732674868442106L), static_cast(0.004954497451132152436631238060933905650346L), static_cast(-0.003374784572167105840686977985330859371848L), @@ -704,9 +704,9 @@ struct lanczos6m24 : public mpl::int_<24> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { - static const T d[5] = { + BOOST_MATH_GPU_STATIC const T d[5] = { static_cast(0.6534966888520080645505805298901130485464L), static_cast(0.006577461728560758362509168026049182707101L), static_cast(-0.004480276069269967207178373559014835978161L), @@ -722,7 +722,7 @@ struct lanczos6m24 : public mpl::int_<24> return result; } - static double g(){ return 1.428456135094165802001953125; } + static BOOST_GPU_ENABLED double g(){ return 1.428456135094165802001953125; } }; // @@ -736,9 +736,9 @@ struct lanczos13m53 : public mpl::int_<53> // Use for double precision, when evaluated as a double: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { - static const T num[13] = { + BOOST_MATH_GPU_STATIC const T num[13] = { static_cast(23531376880.41075968857200767445163675473L), static_cast(42919803642.64909876895789904700198885093L), static_cast(35711959237.35566804944018545154716670596L), @@ -753,7 +753,7 @@ struct lanczos13m53 : public mpl::int_<53> static_cast(210.8242777515793458725097339207133627117L), static_cast(2.506628274631000270164908177133837338626L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { static_cast(0u), static_cast(39916800u), static_cast(120543840u), @@ -772,9 +772,9 @@ struct lanczos13m53 : public mpl::int_<53> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { - static const T num[13] = { + BOOST_MATH_GPU_STATIC const T num[13] = { static_cast(56906521.91347156388090791033559122686859L), static_cast(103794043.1163445451906271053616070238554L), static_cast(86363131.28813859145546927288977868422342L), @@ -789,7 +789,7 @@ struct lanczos13m53 : public mpl::int_<53> static_cast(0.5098416655656676188125178644804694509993L), static_cast(0.006061842346248906525783753964555936883222L) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint32_t) denom[13] = { static_cast(0u), static_cast(39916800u), static_cast(120543840u), @@ -809,9 +809,9 @@ struct lanczos13m53 : public mpl::int_<53> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { - static const T d[12] = { + BOOST_MATH_GPU_STATIC const T d[12] = { static_cast(2.208709979316623790862569924861841433016L), static_cast(-3.327150580651624233553677113928873034916L), static_cast(1.483082862367253753040442933770164111678L), @@ -834,9 +834,9 @@ struct lanczos13m53 : public mpl::int_<53> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { - static const T d[12] = { + BOOST_MATH_GPU_STATIC const T d[12] = { static_cast(6.565936202082889535528455955485877361223L), static_cast(-9.8907772644920670589288081640128194231L), static_cast(4.408830289125943377923077727900630927902L), @@ -859,7 +859,7 @@ struct lanczos13m53 : public mpl::int_<53> return result; } - static double g(){ return 6.024680040776729583740234375; } + static BOOST_GPU_ENABLED double g(){ return 6.024680040776729583740234375; } }; // @@ -873,10 +873,10 @@ struct lanczos17m64 : public mpl::int_<64> // Use for extended-double precision, when evaluated as an extended-double: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[17] = { + BOOST_MATH_GPU_STATIC const T num[17] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 553681095419291969.2230556393350368550504)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 731918863887667017.2511276782146694632234)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 453393234285807339.4627124634539085143364)), @@ -895,7 +895,7 @@ struct lanczos17m64 : public mpl::int_<64> static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 488.0063567520005730476791712814838113252)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.50662827463100050241576877135758834683)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[17] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -918,10 +918,10 @@ struct lanczos17m64 : public mpl::int_<64> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[17] = { + BOOST_MATH_GPU_STATIC const T num[17] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2715894658327.717377557655133124376674911)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 3590179526097.912105038525528721129550434)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2223966599737.814969312127353235818710172)), @@ -940,7 +940,7 @@ struct lanczos17m64 : public mpl::int_<64> static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.002393749522058449186690627996063983095463)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1229541408909435212800785616808830746135e-4)) }; - static const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[17] = { + BOOST_MATH_GPU_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -964,10 +964,10 @@ struct lanczos17m64 : public mpl::int_<64> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[16] = { + BOOST_MATH_GPU_STATIC const T d[16] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 4.493645054286536365763334986866616581265)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -16.95716370392468543800733966378143997694)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 26.19196892983737527836811770970479846644)), @@ -994,10 +994,10 @@ struct lanczos17m64 : public mpl::int_<64> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[16] = { + BOOST_MATH_GPU_STATIC const T d[16] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 23.56409085052261327114594781581930373708)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, -88.92116338946308797946237246006238652361)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 137.3472822086847596961177383569603988797)), @@ -1024,7 +1024,7 @@ struct lanczos17m64 : public mpl::int_<64> return result; } - static double g(){ return 12.2252227365970611572265625; } + static BOOST_GPU_ENABLED double g(){ return 12.2252227365970611572265625; } }; // @@ -1038,10 +1038,10 @@ struct lanczos24m113 : public mpl::int_<113> // Use for long-double precision, when evaluated as an long-double: // template - static T lanczos_sum(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[24] = { + BOOST_MATH_GPU_STATIC const T num[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2029889364934367661624137213253.22102954656825019111612712252027267955023987678816620961507)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2338599599286656537526273232565.2727349714338768161421882478417543004440597874814359063158)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1288527989493833400335117708406.3953711906175960449186720680201425446299360322830739180195)), @@ -1067,7 +1067,7 @@ struct lanczos24m113 : public mpl::int_<113> static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1151.61895453463992438325318456328526085882924197763140514450975619271382783957699017875304)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 2.50662827463100050241576528481104515966515623051532908941425544355490413900497467936202516)) }; - static const T denom[24] = { + BOOST_MATH_GPU_STATIC const T denom[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)), @@ -1097,10 +1097,10 @@ struct lanczos24m113 : public mpl::int_<113> } template - static T lanczos_sum_expG_scaled(const T& z) + static BOOST_GPU_ENABLED T lanczos_sum_expG_scaled(const T& z) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T num[24] = { + BOOST_MATH_GPU_STATIC const T num[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 3035162425359883494754.02878223286972654682199012688209026810841953293372712802258398358538)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 3496756894406430103600.16057175075063458536101374170860226963245118484234495645518505519827)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1926652656689320888654.01954015145958293168365236755537645929361841917596501251362171653478)), @@ -1126,7 +1126,7 @@ struct lanczos24m113 : public mpl::int_<113> static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.172194142179211139195966608011235161516824700287310869949928393345257114743230967204370963e-5)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.374799931707148855771381263542708435935402853962736029347951399323367765509988401336565436e-8)) }; - static const T denom[24] = { + BOOST_MATH_GPU_STATIC const T denom[24] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)), @@ -1157,10 +1157,10 @@ struct lanczos24m113 : public mpl::int_<113> template - static T lanczos_sum_near_1(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_1(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[23] = { + BOOST_MATH_GPU_STATIC const T d[23] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 7.4734083002469026177867421609938203388868806387315406134072298925733950040583068760685908)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, -50.4225805042247530267317342133388132970816607563062253708655085754357843064134941138154171)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 152.288200621747008570784082624444625293884063492396162110698238568311211546361189979357019)), @@ -1194,10 +1194,10 @@ struct lanczos24m113 : public mpl::int_<113> } template - static T lanczos_sum_near_2(const T& dz) + static BOOST_GPU_ENABLED T lanczos_sum_near_2(const T& dz) { lanczos_initializer::force_instantiate(); // Ensure our constants get initialized before main() - static const T d[23] = { + BOOST_MATH_GPU_STATIC const T d[23] = { static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 61.4165001061101455341808888883960361969557848005400286332291451422461117307237198559485365)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, -414.372973678657049667308134761613915623353625332248315105320470271523320700386200587519147)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 113, 1251.50505818554680171298972755376376836161706773644771875668053742215217922228357204561873)), @@ -1231,7 +1231,7 @@ struct lanczos24m113 : public mpl::int_<113> return result; } - static double g(){ return 20.3209821879863739013671875; } + static BOOST_GPU_ENABLED double g(){ return 20.3209821879863739013671875; } }; diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp index 7fa1eb8de9..5a24735fbe 100644 --- a/include/boost/math/special_functions/log1p.hpp +++ b/include/boost/math/special_functions/log1p.hpp @@ -41,16 +41,16 @@ namespace detail { typedef T result_type; - log1p_series(T x) + BOOST_GPU_ENABLED log1p_series(T x) : k(0), m_mult(-x), m_prod(-1){} - T operator()() + BOOST_GPU_ENABLED T operator()() { m_prod *= m_mult; return m_prod / ++k; } - int count()const + BOOST_GPU_ENABLED int count()const { return k; } @@ -73,12 +73,12 @@ namespace detail // it performs no better than log(1+x): which is to say not very well at all. // template -T log1p_imp(T const & x, const Policy& pol, const mpl::int_<0>&) +BOOST_GPU_ENABLED T log1p_imp(T const & x, const Policy& pol, const mpl::int_<0>&) { // The function returns the natural logarithm of 1 + x. typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::log1p<%1%>(%1%)"; if((x < -1) || (boost::math::isnan)(x)) return policies::raise_domain_error( @@ -107,11 +107,11 @@ T log1p_imp(T const & x, const Policy& pol, const mpl::int_<0>&) } template -T log1p_imp(T const& x, const Policy& pol, const mpl::int_<53>&) +BOOST_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const mpl::int_<53>&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -132,7 +132,7 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<53>&) // Expected Error Term: 1.843e-017 // Maximum Relative Change in Control Points: 8.138e-004 // Max Error found at double precision = 3.250766e-016 - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { 0.15141069795941984e-16L, 0.35495104378055055e-15L, 0.33333333333332835L, @@ -142,7 +142,7 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<53>&) 0.13703234928513215L, 0.011294864812099712L }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { 1L, 3.7274719063011499L, 5.5387948649720334L, @@ -160,11 +160,11 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<53>&) } template -T log1p_imp(T const& x, const Policy& pol, const mpl::int_<64>&) +BOOST_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const mpl::int_<64>&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -185,7 +185,7 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<64>&) // Expected Error Term: 8.088e-20 // Maximum Relative Change in Control Points: 9.648e-05 // Max Error found at long double precision = 2.242324e-19 - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.807533446680736736712e-19), BOOST_MATH_BIG_CONSTANT(T, 64, -0.490881544804798926426e-18), BOOST_MATH_BIG_CONSTANT(T, 64, 0.333333333333333373941), @@ -196,7 +196,7 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<64>&) BOOST_MATH_BIG_CONSTANT(T, 64, 0.0706537026422828914622), BOOST_MATH_BIG_CONSTANT(T, 64, 0.00441709903782239229447) }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 4.26423872346263928361), BOOST_MATH_BIG_CONSTANT(T, 64, 7.48189472704477708962), @@ -215,11 +215,11 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<64>&) } template -T log1p_imp(T const& x, const Policy& pol, const mpl::int_<24>&) +BOOST_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const mpl::int_<24>&) { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -241,13 +241,13 @@ T log1p_imp(T const& x, const Policy& pol, const mpl::int_<24>&) // Maximum Relative Change in Control Points: 2.509e-04 // Max Error found at double precision = 6.910422e-08 // Max Error found at float precision = 8.357242e-08 - static const T P[] = { + BOOST_MATH_GPU_STATIC const T P[] = { -0.671192866803148236519e-7L, 0.119670999140731844725e-6L, 0.333339469182083148598L, 0.237827183019664122066L }; - static const T Q[] = { + BOOST_MATH_GPU_STATIC const T Q[] = { 1L, 1.46348272586988539733L, 0.497859871350117338894L, @@ -278,9 +278,11 @@ struct log1p_initializer void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { +#ifndef __CUDA_ARCH__ initializer.force_instantiate(); +#endif } }; @@ -291,7 +293,7 @@ const typename log1p_initializer::init log1p_initializer -inline typename tools::promote_args::type log1p(T x, const Policy&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type log1p(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -351,7 +353,7 @@ inline long double log1p(long double z) #if defined(BOOST_HAS_LOG1P) && !(defined(__osf__) && defined(__DECCXX_VER)) # ifdef BOOST_MATH_USE_C99 template -inline float log1p(float x, const Policy& pol) +inline BOOST_GPU_ENABLED float log1p(float x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -363,7 +365,7 @@ inline float log1p(float x, const Policy& pol) } #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS template -inline long double log1p(long double x, const Policy& pol) +inline BOOST_GPU_ENABLED long double log1p(long double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -376,7 +378,7 @@ inline long double log1p(long double x, const Policy& pol) #endif #else template -inline float log1p(float x, const Policy& pol) +inline BOOST_GPU_ENABLED float log1p(float x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -388,7 +390,7 @@ inline float log1p(float x, const Policy& pol) } #endif template -inline double log1p(double x, const Policy& pol) +inline BOOST_GPU_ENABLED double log1p(double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -405,7 +407,7 @@ inline double log1p(double x, const Policy& pol) // Currently tested with VC8 and Intel 9.1. // template -inline double log1p(double x, const Policy& pol) +inline BOOST_GPU_ENABLED double log1p(double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -420,7 +422,7 @@ inline double log1p(double x, const Policy& pol) return ::log(u)*(x/(u-1.0)); } template -inline float log1p(float x, const Policy& pol) +inline BOOST_GPU_ENABLED float log1p(float x, const Policy& pol) { return static_cast(boost::math::log1p(static_cast(x), pol)); } @@ -430,7 +432,7 @@ inline float log1p(float x, const Policy& pol) // Needs more investigation. // template -inline long double log1p(long double x, const Policy& pol) +inline BOOST_GPU_ENABLED long double log1p(long double x, const Policy& pol) { if(x < -1) return policies::raise_domain_error( @@ -448,7 +450,7 @@ inline long double log1p(long double x, const Policy& pol) #endif template -inline typename tools::promote_args::type log1p(T x) +inline BOOST_GPU_ENABLED typename tools::promote_args::type log1p(T x) { return boost::math::log1p(x, policies::policy<>()); } @@ -456,12 +458,12 @@ inline typename tools::promote_args::type log1p(T x) // Compute log(1+x)-x: // template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type log1pmx(T x, const Policy& pol) { typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING - static const char* function = "boost::math::log1pmx<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::log1pmx<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -491,7 +493,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type log1pmx(T x) +inline BOOST_GPU_ENABLED typename tools::promote_args::type log1pmx(T x) { return log1pmx(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index 4f44f56113..610927ea83 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -40,39 +40,39 @@ namespace boost // Beta functions. template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b); // Beta function (2 arguments). template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, A x); // Beta function (3 arguments). template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type beta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Beta function (3 arguments). template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type betac(RT1 a, RT2 b, RT3 x, const Policy& pol); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x); // Incomplete beta function. template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibeta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta function. template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x); // Incomplete beta complement function. template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibetac(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta complement function. template @@ -140,39 +140,39 @@ namespace boost ibetac_invb(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function. template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x); // derivative of incomplete beta template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy& pol); // derivative of incomplete beta // Binomial: template - T binomial_coefficient(unsigned n, unsigned k, const Policy& pol); + BOOST_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol); template - T binomial_coefficient(unsigned n, unsigned k); + BOOST_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k); // erf & erfc error functions. template // Error function. - typename tools::promote_args::type erf(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type erf(RT z); template // Error function. - typename tools::promote_args::type erf(RT z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type erf(RT z, const Policy&); template // Error function complement. - typename tools::promote_args::type erfc(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type erfc(RT z); template // Error function complement. - typename tools::promote_args::type erfc(RT z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type erfc(RT z, const Policy&); template // Error function inverse. - typename tools::promote_args::type erf_inv(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type erf_inv(RT z); template // Error function inverse. - typename tools::promote_args::type erf_inv(RT z, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type erf_inv(RT z, const Policy& pol); template // Error function complement inverse. - typename tools::promote_args::type erfc_inv(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type erfc_inv(RT z); template // Error function complement inverse. - typename tools::promote_args::type erfc_inv(RT z, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type erfc_inv(RT z, const Policy& pol); // Polynomials: template @@ -313,83 +313,83 @@ namespace boost // Elliptic integrals: template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rf(T1 x, T2 y, T3 z, const Policy& pol); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rd(T1 x, T2 y, T3 z, const Policy& pol); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rc(T1 x, T2 y); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rc(T1 x, T2 y, const Policy& pol); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_rg(T1 x, T2 y, T3 z, const Policy& pol); template - typename tools::promote_args::type ellint_2(T k); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T k); template - typename tools::promote_args::type ellint_2(T1 k, T2 phi); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi); template - typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol); template - typename tools::promote_args::type ellint_1(T k); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T k); template - typename tools::promote_args::type ellint_1(T1 k, T2 phi); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi); template - typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol); template - typename tools::promote_args::type ellint_d(T k); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T k); template - typename tools::promote_args::type ellint_d(T1 k, T2 phi); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi); template - typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_d(T1 k, T2 phi, const Policy& pol); template - typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi); + BOOST_GPU_ENABLED typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi); template - typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type jacobi_zeta(T1 k, T2 phi, const Policy& pol); template - typename tools::promote_args::type heuman_lambda(T1 k, T2 phi); + BOOST_GPU_ENABLED typename tools::promote_args::type heuman_lambda(T1 k, T2 phi); template - typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type heuman_lambda(T1 k, T2 phi, const Policy& pol); namespace detail{ @@ -407,104 +407,104 @@ namespace boost template - typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi); + BOOST_GPU_ENABLED typename detail::ellint_3_result::type ellint_3(T1 k, T2 v, T3 phi); template - typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v, T3 phi, const Policy& pol); template - typename tools::promote_args::type ellint_3(T1 k, T2 v); + BOOST_GPU_ENABLED typename tools::promote_args::type ellint_3(T1 k, T2 v); // Factorial functions. // Note: not for integral types, at present. template struct max_factorial; template - RT factorial(unsigned int); + BOOST_GPU_ENABLED RT factorial(unsigned int); template - RT factorial(unsigned int, const Policy& pol); + BOOST_GPU_ENABLED RT factorial(unsigned int, const Policy& pol); template - RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT)); + BOOST_GPU_ENABLED RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT)); template - RT double_factorial(unsigned i); + BOOST_GPU_ENABLED RT double_factorial(unsigned i); template - RT double_factorial(unsigned i, const Policy& pol); + BOOST_GPU_ENABLED RT double_factorial(unsigned i, const Policy& pol); template - typename tools::promote_args::type falling_factorial(RT x, unsigned n); + BOOST_GPU_ENABLED typename tools::promote_args::type falling_factorial(RT x, unsigned n); template - typename tools::promote_args::type falling_factorial(RT x, unsigned n, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type falling_factorial(RT x, unsigned n, const Policy& pol); template - typename tools::promote_args::type rising_factorial(RT x, int n); + BOOST_GPU_ENABLED typename tools::promote_args::type rising_factorial(RT x, int n); template - typename tools::promote_args::type rising_factorial(RT x, int n, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type rising_factorial(RT x, int n, const Policy& pol); // Gamma functions. template - typename tools::promote_args::type tgamma(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(RT z); template - typename tools::promote_args::type tgamma1pm1(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma1pm1(RT z); template - typename tools::promote_args::type tgamma1pm1(RT z, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma1pm1(RT z, const Policy& pol); template - typename tools::promote_args::type tgamma(RT1 a, RT2 z); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(RT1 a, RT2 z); template - typename tools::promote_args::type tgamma(RT1 a, RT2 z, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma(RT1 a, RT2 z, const Policy& pol); template - typename tools::promote_args::type lgamma(RT z, int* sign); + BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(RT z, int* sign); template - typename tools::promote_args::type lgamma(RT z, int* sign, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(RT z, int* sign, const Policy& pol); template - typename tools::promote_args::type lgamma(RT x); + BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(RT x); template - typename tools::promote_args::type lgamma(RT x, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type lgamma(RT x, const Policy& pol); template - typename tools::promote_args::type tgamma_lower(RT1 a, RT2 z); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_lower(RT1 a, RT2 z); template - typename tools::promote_args::type tgamma_lower(RT1 a, RT2 z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_lower(RT1 a, RT2 z, const Policy&); template - typename tools::promote_args::type gamma_q(RT1 a, RT2 z); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_q(RT1 a, RT2 z); template - typename tools::promote_args::type gamma_q(RT1 a, RT2 z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_q(RT1 a, RT2 z, const Policy&); template - typename tools::promote_args::type gamma_p(RT1 a, RT2 z); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p(RT1 a, RT2 z); template - typename tools::promote_args::type gamma_p(RT1 a, RT2 z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p(RT1 a, RT2 z, const Policy&); template - typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta); template - typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_delta_ratio(T1 z, T2 delta, const Policy&); template - typename tools::promote_args::type tgamma_ratio(T1 a, T2 b); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_ratio(T1 a, T2 b); template - typename tools::promote_args::type tgamma_ratio(T1 a, T2 b, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type tgamma_ratio(T1 a, T2 b, const Policy&); template - typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x); template - typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type gamma_p_derivative(T1 a, T2 x, const Policy&); // gamma inverse. template @@ -563,39 +563,39 @@ namespace boost // cbrt - cube root. template - typename tools::promote_args::type cbrt(RT z); + BOOST_GPU_ENABLED typename tools::promote_args::type cbrt(RT z); template - typename tools::promote_args::type cbrt(RT z, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type cbrt(RT z, const Policy&); // log1p is log(x + 1) template - typename tools::promote_args::type log1p(T); + BOOST_GPU_ENABLED typename tools::promote_args::type log1p(T); template - typename tools::promote_args::type log1p(T, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type log1p(T, const Policy&); // log1pmx is log(x + 1) - x template - typename tools::promote_args::type log1pmx(T); + BOOST_GPU_ENABLED typename tools::promote_args::type log1pmx(T); template - typename tools::promote_args::type log1pmx(T, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type log1pmx(T, const Policy&); // Exp (x) minus 1 functions. template - typename tools::promote_args::type expm1(T); + BOOST_GPU_ENABLED typename tools::promote_args::type expm1(T); template - typename tools::promote_args::type expm1(T, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type expm1(T, const Policy&); // Power - 1 template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type powm1(const T1 a, const T2 z); template - typename tools::promote_args::type + BOOST_GPU_ENABLED typename tools::promote_args::type powm1(const T1 a, const T2 z, const Policy&); // sqrt(1+x) - 1 @@ -607,10 +607,10 @@ namespace boost // sinus cardinals: template - typename tools::promote_args::type sinc_pi(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type sinc_pi(T x); template - typename tools::promote_args::type sinc_pi(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type sinc_pi(T x, const Policy&); template typename tools::promote_args::type sinhc_pi(T x); @@ -620,22 +620,22 @@ namespace boost // inverse hyperbolics: template - typename tools::promote_args::type asinh(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type asinh(T x); template - typename tools::promote_args::type asinh(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type asinh(T x, const Policy&); template - typename tools::promote_args::type acosh(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type acosh(T x); template - typename tools::promote_args::type acosh(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type acosh(T x, const Policy&); template - typename tools::promote_args::type atanh(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type atanh(T x); template - typename tools::promote_args::type atanh(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type atanh(T x, const Policy&); namespace detail{ @@ -861,43 +861,43 @@ namespace boost const Policy&); template - typename tools::promote_args::type sin_pi(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type sin_pi(T x, const Policy&); template - typename tools::promote_args::type sin_pi(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type sin_pi(T x); template - typename tools::promote_args::type cos_pi(T x, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type cos_pi(T x, const Policy&); template - typename tools::promote_args::type cos_pi(T x); + BOOST_GPU_ENABLED typename tools::promote_args::type cos_pi(T x); template - int fpclassify BOOST_NO_MACRO_EXPAND(T t); + BOOST_GPU_ENABLED int fpclassify BOOST_NO_MACRO_EXPAND(T t); template - bool isfinite BOOST_NO_MACRO_EXPAND(T z); + BOOST_GPU_ENABLED bool isfinite BOOST_NO_MACRO_EXPAND(T z); template - bool isinf BOOST_NO_MACRO_EXPAND(T t); + BOOST_GPU_ENABLED bool isinf BOOST_NO_MACRO_EXPAND(T t); template - bool isnan BOOST_NO_MACRO_EXPAND(T t); + BOOST_GPU_ENABLED bool isnan BOOST_NO_MACRO_EXPAND(T t); template - bool isnormal BOOST_NO_MACRO_EXPAND(T t); + BOOST_GPU_ENABLED bool isnormal BOOST_NO_MACRO_EXPAND(T t); template - int signbit BOOST_NO_MACRO_EXPAND(T x); + BOOST_GPU_ENABLED int signbit BOOST_NO_MACRO_EXPAND(T x); template - int sign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z); template - typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); + BOOST_GPU_ENABLED typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); template - typename tools::promote_args_permissive::type changesign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_GPU_ENABLED typename tools::promote_args_permissive::type changesign BOOST_NO_MACRO_EXPAND(const T& z); // Exponential integrals: namespace detail{ @@ -1019,32 +1019,32 @@ namespace boost // pow: template - typename tools::promote_args::type pow(T base, const Policy& policy); + BOOST_GPU_ENABLED BOOST_CONSTEXPR typename tools::promote_args::type pow(T base, const Policy& policy); template - typename tools::promote_args::type pow(T base); + BOOST_GPU_ENABLED BOOST_CONSTEXPR typename tools::promote_args::type pow(T base); // next: template - typename tools::promote_args::type nextafter(const T&, const U&, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T&, const U&, const Policy&); template - typename tools::promote_args::type nextafter(const T&, const U&); + BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T&, const U&); template - typename tools::promote_args::type float_next(const T&, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T&, const Policy&); template - typename tools::promote_args::type float_next(const T&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T&); template - typename tools::promote_args::type float_prior(const T&, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T&, const Policy&); template - typename tools::promote_args::type float_prior(const T&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T&); template - typename tools::promote_args::type float_distance(const T&, const U&, const Policy&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T&, const U&, const Policy&); template - typename tools::promote_args::type float_distance(const T&, const U&); + BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T&, const U&); template - typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol); template - typename tools::promote_args::type float_advance(const T& val, int distance); + BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(const T& val, int distance); template typename tools::promote_args::type ulp(const T& val, const Policy& pol); diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp index 3ce74e7aa3..20321664d7 100644 --- a/include/boost/math/special_functions/modf.hpp +++ b/include/boost/math/special_functions/modf.hpp @@ -17,50 +17,50 @@ namespace boost{ namespace math{ template -inline T modf(const T& v, T* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol) { *ipart = trunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, T* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, int* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol) { *ipart = itrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, int* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol) { *ipart = ltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart) { return modf(v, ipart, policies::policy<>()); } #ifdef BOOST_HAS_LONG_LONG template -inline T modf(const T& v, boost::long_long_type* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol) { *ipart = lltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, boost::long_long_type* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart) { return modf(v, ipart, policies::policy<>()); } diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp index a63983e1c3..5b1fd0f682 100644 --- a/include/boost/math/special_functions/next.hpp +++ b/include/boost/math/special_functions/next.hpp @@ -80,14 +80,14 @@ inline T normalize_value(const T& val, const mpl::true_&) } template -inline T get_smallest_value(mpl::true_ const&) +inline BOOST_GPU_ENABLED T get_smallest_value(mpl::true_ const&) { // // numeric_limits lies about denorms being present - particularly // when this can be turned on or off at runtime, as is the case // when using the SSE2 registers in DAZ or FTZ mode. // - static const T m = std::numeric_limits::denorm_min(); + BOOST_MATH_GPU_STATIC const T m = std::numeric_limits::denorm_min(); #ifdef BOOST_MATH_CHECK_SSE2 return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value() : m;; #else @@ -96,15 +96,17 @@ inline T get_smallest_value(mpl::true_ const&) } template -inline T get_smallest_value(mpl::false_ const&) +inline BOOST_GPU_ENABLED T get_smallest_value(mpl::false_ const&) { return tools::min_value(); } template -inline T get_smallest_value() +inline BOOST_GPU_ENABLED T get_smallest_value() { -#if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) +#ifdef __CUDA_ARCH__ + return get_smallest_value(mpl::bool_()); +#elif defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == 1)>()); #else return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == std::denorm_present)>()); @@ -116,25 +118,25 @@ inline T get_smallest_value() // we calculate the value of the least-significant-bit: // template -T get_min_shift_value(); +BOOST_GPU_ENABLED T get_min_shift_value(); template struct min_shift_initializer { struct init { - init() + BOOST_GPU_ENABLED init() { do_init(); } - static void do_init() + static BOOST_GPU_ENABLED void do_init() { get_min_shift_value(); } - void force_instantiate()const{} + BOOST_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { initializer.force_instantiate(); } @@ -144,13 +146,13 @@ template const typename min_shift_initializer::init min_shift_initializer::initializer; template -inline T calc_min_shifted(const mpl::true_&) +inline BOOST_GPU_ENABLED T calc_min_shifted(const mpl::true_&) { BOOST_MATH_STD_USING return ldexp(tools::min_value(), tools::digits() + 1); } template -inline T calc_min_shifted(const mpl::false_&) +inline BOOST_GPU_ENABLED T calc_min_shifted(const mpl::false_&) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); @@ -160,7 +162,7 @@ inline T calc_min_shifted(const mpl::false_&) template -inline T get_min_shift_value() +inline BOOST_GPU_ENABLED T get_min_shift_value() { static const T val = calc_min_shifted(mpl::bool_::is_specialized || std::numeric_limits::radix == 2>()); min_shift_initializer::force_instantiate(); @@ -169,11 +171,11 @@ inline T get_min_shift_value() } template -T float_next_imp(const T& val, const mpl::true_&, const Policy& pol) +BOOST_GPU_ENABLED T float_next_imp(const T& val, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING int expon; - static const char* function = "float_next<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); @@ -213,7 +215,7 @@ T float_next_imp(const T& val, const mpl::true_&, const Policy& pol) // Special version for some base other than 2: // template -T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) +BOOST_GPU_ENABLED T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); @@ -231,7 +233,7 @@ T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); - } +} if(val >= tools::max_value()) return policies::raise_overflow_error(function, 0, pol); @@ -247,7 +249,7 @@ T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return scalbn(float_next(T(scalbn(val, 2 * std::numeric_limits::digits)), pol), -2 * std::numeric_limits::digits); - } +} expon = 1 + ilogb(val); if(-1 == scalbn(val, -expon) * std::numeric_limits::radix) @@ -261,7 +263,7 @@ T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type float_next(const T& val, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_next_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); @@ -276,7 +278,7 @@ inline typename tools::promote_args::type float_next(const T& val, const Poli template inline double float_next(const double& val, const Policy& pol) { - static const char* function = "float_next<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val > 0)) return policies::raise_domain_error( @@ -291,7 +293,7 @@ inline double float_next(const double& val, const Policy& pol) #endif template -inline typename tools::promote_args::type float_next(const T& val) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T& val) { return float_next(val, policies::policy<>()); } @@ -299,11 +301,11 @@ inline typename tools::promote_args::type float_next(const T& val) namespace detail{ template -T float_prior_imp(const T& val, const mpl::true_&, const Policy& pol) +BOOST_GPU_ENABLED T float_prior_imp(const T& val, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING int expon; - static const char* function = "float_prior<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); @@ -344,7 +346,7 @@ T float_prior_imp(const T& val, const mpl::true_&, const Policy& pol) // Special version for bases other than 2: // template -T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) +BOOST_GPU_ENABLED T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); @@ -362,7 +364,7 @@ T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); - } +} if(val <= -tools::max_value()) return -policies::raise_overflow_error(function, 0, pol); @@ -378,7 +380,7 @@ T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return scalbn(float_prior(T(scalbn(val, 2 * std::numeric_limits::digits)), pol), -2 * std::numeric_limits::digits); - } +} expon = 1 + ilogb(val); T remain = scalbn(val, -expon); @@ -393,7 +395,7 @@ T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type float_prior(const T& val, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_prior_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); @@ -408,7 +410,7 @@ inline typename tools::promote_args::type float_prior(const T& val, const Pol template inline double float_prior(const double& val, const Policy& pol) { - static const char* function = "float_prior<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val < 0)) return policies::raise_domain_error( @@ -423,20 +425,20 @@ inline double float_prior(const double& val, const Policy& pol) #endif template -inline typename tools::promote_args::type float_prior(const T& val) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T& val) { return float_prior(val, policies::policy<>()); } template -inline typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) { typedef typename tools::promote_args::type result_type; return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); } template -inline typename tools::promote_args::type nextafter(const T& val, const U& direction) +inline BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T& val, const U& direction) { return nextafter(val, direction, policies::policy<>()); } @@ -444,13 +446,13 @@ inline typename tools::promote_args::type nextafter(const T& val, const U& namespace detail{ template -T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& pol) +BOOST_GPU_ENABLED T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // - static const char* function = "float_distance<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_distance<%1%>(%1%, %1%)"; if(!(boost::math::isfinite)(a)) return policies::raise_domain_error( function, @@ -480,8 +482,8 @@ T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& po if(a < 0) return float_distance(static_cast(-b), static_cast(-a), pol); - BOOST_ASSERT(a >= 0); - BOOST_ASSERT(b >= a); + BOOST_MATH_ASSERT(a >= 0); + BOOST_MATH_ASSERT(b >= a); int expon; // @@ -519,7 +521,7 @@ T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& po // T a2 = ldexp(a, tools::digits()); T b2 = ldexp(b, tools::digits()); - mb = -(std::min)(T(ldexp(upper, tools::digits())), b2); + mb = -BOOST_MATH_CUDA_SAFE_MIN(T(ldexp(upper, tools::digits())), b2); x = a2 + mb; z = x - a2; y = (a2 - (x - z)) + (mb - z); @@ -528,7 +530,7 @@ T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& po } else { - mb = -(std::min)(upper, b); + mb = -BOOST_MATH_CUDA_SAFE_MIN(upper, b); x = a + mb; z = x - a; y = (a - (x - z)) + (mb - z); @@ -542,14 +544,14 @@ T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& po // // Result must be an integer: // - BOOST_ASSERT(result == floor(result)); + BOOST_MATH_ASSERT(result == floor(result)); return result; } // float_distance_imp // // Special versions for bases other than 2: // template -T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& pol) +BOOST_GPU_ENABLED T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); @@ -610,7 +612,7 @@ T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& p T upper2 = scalbn(T(1), expon2 - 1); result = float_distance(upper2, b); result += (expon2 - expon - 1) * scalbn(T(1), std::numeric_limits::digits - 1); - } +} // // Use compensated double-double addition to avoid rounding // errors in the subtraction: @@ -626,7 +628,7 @@ T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& p // T a2 = scalbn(a, std::numeric_limits::digits); T b2 = scalbn(b, std::numeric_limits::digits); - mb = -(std::min)(T(scalbn(upper, std::numeric_limits::digits)), b2); + mb = -BOOST_MATH_CUDA_SAFE_MIN(T(scalbn(upper, std::numeric_limits::digits)), b2); x = a2 + mb; z = x - a2; y = (a2 - (x - z)) + (mb - z); @@ -635,7 +637,7 @@ T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& p } else { - mb = -(std::min)(upper, b); + mb = -BOOST_MATH_CUDA_SAFE_MIN(upper, b); x = a + mb; z = x - a; y = (a - (x - z)) + (mb - z); @@ -656,14 +658,14 @@ T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& p } // namespace detail template -inline typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_distance_imp(detail::normalize_value(static_cast(a), typename detail::has_hidden_guard_digits::type()), detail::normalize_value(static_cast(b), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } template -typename tools::promote_args::type float_distance(const T& a, const U& b) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T& a, const U& b) { return boost::math::float_distance(a, b, policies::policy<>()); } @@ -671,13 +673,13 @@ typename tools::promote_args::type float_distance(const T& a, const U& b) namespace detail{ template -T float_advance_imp(T val, int distance, const mpl::true_&, const Policy& pol) +BOOST_GPU_ENABLED T float_advance_imp(T val, int distance, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // - static const char* function = "float_advance<%1%>(%1%, int)"; + BOOST_MATH_GPU_STATIC const char* function = "float_advance<%1%>(%1%, int)"; int fpclass = (boost::math::fpclassify)(val); @@ -754,7 +756,7 @@ T float_advance_imp(T val, int distance, const mpl::true_&, const Policy& pol) // Special version for bases other than 2: // template -T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) +BOOST_GPU_ENABLED T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); @@ -791,7 +793,7 @@ T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) if(distance > 0) { do{ val = float_next(val, pol); } while(--distance); - } +} else { do{ val = float_prior(val, pol); } while(++distance); @@ -804,7 +806,7 @@ T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) if(val <= tools::min_value()) { limit = sign(T(distance)) * tools::min_value(); - } +} T limit_distance = float_distance(val, limit); while(fabs(limit_distance) < abs(distance)) { @@ -840,14 +842,14 @@ T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_advance_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), distance, mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } template -inline typename tools::promote_args::type float_advance(const T& val, int distance) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(const T& val, int distance) { return boost::math::float_advance(val, distance, policies::policy<>()); } diff --git a/include/boost/math/special_functions/owens_t.hpp b/include/boost/math/special_functions/owens_t.hpp index 7fbd8918c0..a6c31e68c5 100644 --- a/include/boost/math/special_functions/owens_t.hpp +++ b/include/boost/math/special_functions/owens_t.hpp @@ -117,7 +117,7 @@ namespace boost { static const unsigned short ord[] = {2, 3, 4, 5, 7, 10, 12, 18, 10, 20, 30, 0, 4, 7, 8, 20, 0, 0}; // 18 entries - BOOST_ASSERT(icode<18); + BOOST_MATH_ASSERT(icode<18); return ord[icode]; } // unsigned short owens_t_get_order(const unsigned short icode, RealType, mpl::int<53> const&) @@ -128,7 +128,7 @@ namespace boost // method ================>>> {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 6} static const unsigned short ord[] = {3, 4, 5, 6, 8, 11, 13, 19, 10, 20, 30, 0, 7, 10, 11, 23, 0, 0}; // 18 entries - BOOST_ASSERT(icode<18); + BOOST_MATH_ASSERT(icode<18); return ord[icode]; } // unsigned short owens_t_get_order(const unsigned short icode, RealType, mpl::int<64> const&) @@ -254,7 +254,7 @@ namespace boost while( true ) { - BOOST_ASSERT(i < 21); + BOOST_MATH_ASSERT(i < 21); val += zi*c2[i]; if( m <= i ) // if( m < i+1 ) { @@ -326,7 +326,7 @@ namespace boost while( true ) { - BOOST_ASSERT(i < 31); + BOOST_MATH_ASSERT(i < 31); val += zi*c2[i]; if( m <= i ) // if( m < i+1 ) { @@ -425,7 +425,7 @@ namespace boost RealType val = 0; for(unsigned short i = 0; i < m; ++i) { - BOOST_ASSERT(i < 13); + BOOST_MATH_ASSERT(i < 13); const RealType r = static_cast(1) + as*pts[i]; val += wts[i] * exp( hs*r ) / r; } // for(unsigned short i = 0; i < m; ++i) @@ -497,7 +497,7 @@ namespace boost RealType val = 0; for(unsigned short i = 0; i < m; ++i) { - BOOST_ASSERT(i < 19); + BOOST_MATH_ASSERT(i < 19); const RealType r = 1 + as*pts[i]; val += wts[i] * exp( hs*r ) / r; } // for(unsigned short i = 0; i < m; ++i) diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp index 494f721d05..07f4b24241 100644 --- a/include/boost/math/special_functions/pow.hpp +++ b/include/boost/math/special_functions/pow.hpp @@ -30,15 +30,17 @@ namespace math { namespace detail { + template + inline BOOST_GPU_ENABLED BOOST_CONSTEXPR T square(const T& val) { return val * val; } + template struct positive_power { template - static T result(T base) + static BOOST_GPU_ENABLED BOOST_CXX14_CONSTEXPR T result(T base) { - T power = positive_power::result(base); - return power * power; + return square(positive_power::result(base)); } }; @@ -46,10 +48,9 @@ template struct positive_power { template - static T result(T base) + static BOOST_GPU_ENABLED BOOST_CONSTEXPR T result(T base) { - T power = positive_power::result(base); - return base * power * power; + return base * square(positive_power::result(base)); } }; @@ -57,7 +58,7 @@ template <> struct positive_power<1, 1> { template - static T result(T base){ return base; } + static BOOST_GPU_ENABLED BOOST_CONSTEXPR T result(T base){ return base; } }; @@ -65,7 +66,7 @@ template struct power_if_positive { template - static T result(T base, const Policy&) + static BOOST_GPU_ENABLED BOOST_CONSTEXPR T result(T base, const Policy&) { return positive_power::result(base); } }; @@ -73,18 +74,13 @@ template struct power_if_positive { template - static T result(T base, const Policy& policy) + static BOOST_GPU_ENABLED BOOST_CONSTEXPR T result(T base, const Policy& policy) { - if (base == 0) - { - return policies::raise_overflow_error( + return base == 0 ? policies::raise_overflow_error( "boost::math::pow(%1%)", "Attempted to compute a negative power of 0", policy - ); - } - - return T(1) / positive_power<-N>::result(base); + ) : T(1) / positive_power<-N>::result(base); } }; @@ -92,20 +88,15 @@ template <> struct power_if_positive<0, true> { template - static T result(T base, const Policy& policy) + static BOOST_GPU_ENABLED BOOST_CONSTEXPR T result(T base, const Policy& policy) { - if (base == 0) - { - return policies::raise_indeterminate_result_error( + return base == 0 ? policies::raise_indeterminate_result_error( "boost::math::pow(%1%)", "The result of pow<0>(%1%) is undetermined", base, T(1), policy - ); - } - - return T(1); + ) : T(1); } }; @@ -126,15 +117,14 @@ struct select_power_if_positive template -inline typename tools::promote_args::type pow(T base, const Policy& policy) +inline BOOST_GPU_ENABLED BOOST_CONSTEXPR typename tools::promote_args::type pow(T base, const Policy& policy) { - typedef typename tools::promote_args::type result_type; - return detail::select_power_if_positive::type::result(static_cast(base), policy); + return detail::select_power_if_positive::type::result(static_cast::type>(base), policy); } template -inline typename tools::promote_args::type pow(T base) +inline BOOST_GPU_ENABLED BOOST_CONSTEXPR typename tools::promote_args::type pow(T base) { return pow(base, policies::policy<>()); } #ifdef BOOST_MSVC diff --git a/include/boost/math/special_functions/powm1.hpp b/include/boost/math/special_functions/powm1.hpp index 37db8746c8..71512af05d 100644 --- a/include/boost/math/special_functions/powm1.hpp +++ b/include/boost/math/special_functions/powm1.hpp @@ -21,10 +21,10 @@ namespace boost{ namespace math{ namespace detail{ template -inline T powm1_imp(const T x, const T y, const Policy& pol) +inline BOOST_GPU_ENABLED T powm1_imp(const T x, const T y, const Policy& pol) { BOOST_MATH_STD_USING - static const char* function = "boost::math::powm1<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::powm1<%1%>(%1%, %1%)"; if (x > 0) { @@ -54,7 +54,7 @@ inline T powm1_imp(const T x, const T y, const Policy& pol) } // detail template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type powm1(const T1 a, const T2 z) { typedef typename tools::promote_args::type result_type; @@ -62,7 +62,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +inline BOOST_GPU_ENABLED typename tools::promote_args::type powm1(const T1 a, const T2 z, const Policy& pol) { typedef typename tools::promote_args::type result_type; diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp index e21f7185d1..cb5990a675 100644 --- a/include/boost/math/special_functions/round.hpp +++ b/include/boost/math/special_functions/round.hpp @@ -20,7 +20,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline typename tools::promote_args::type round(const T& v, const Policy& pol, const mpl::false_) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol, const mpl::false_) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; @@ -52,7 +52,7 @@ inline typename tools::promote_args::type round(const T& v, const Policy& pol } } template -inline typename tools::promote_args::type round(const T& v, const Policy&, const mpl::true_) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy&, const mpl::true_) { return v; } @@ -60,12 +60,12 @@ inline typename tools::promote_args::type round(const T& v, const Policy&, co } // namespace detail template -inline typename tools::promote_args::type round(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol) { return detail::round(v, pol, mpl::bool_::value>()); } template -inline typename tools::promote_args::type round(const T& v) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v) { return round(v, policies::policy<>()); } @@ -79,31 +79,39 @@ inline typename tools::promote_args::type round(const T& v) // dependent lookup. See our concept archetypes for examples. // template -inline int iround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > INT_MAX) || (r < INT_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::iround<%1%>(%1%)", 0, v, 0, pol)); return static_cast(r); } template -inline int iround(const T& v) +inline BOOST_GPU_ENABLED int iround(const T& v) { return iround(v, policies::policy<>()); } template -inline long lround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LONG_MAX) || (r < LONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::lround<%1%>(%1%)", 0, v, 0L, pol)); return static_cast(r); } template -inline long lround(const T& v) +inline BOOST_GPU_ENABLED long lround(const T& v) { return lround(v, policies::policy<>()); } @@ -111,16 +119,20 @@ inline long lround(const T& v) #ifdef BOOST_HAS_LONG_LONG template -inline boost::long_long_type llround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LLONG_MAX) || (r < LLONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::llround<%1%>(%1%)", 0, v, static_cast(0), pol)); return static_cast(r); } template -inline boost::long_long_type llround(const T& v) +inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v) { return llround(v, policies::policy<>()); } diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp index 5cb21bac54..969c977603 100644 --- a/include/boost/math/special_functions/sign.hpp +++ b/include/boost/math/special_functions/sign.hpp @@ -25,7 +25,7 @@ namespace detail { #ifdef BOOST_MATH_USE_STD_FPCLASSIFY template - inline int signbit_impl(T x, native_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(T x, native_tag const&) { return (std::signbit)(x) ? 1 : 0; } @@ -35,13 +35,13 @@ namespace detail { // signed zero or NaN. template - inline int signbit_impl(T x, generic_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(T x, generic_tag const&) { return x < 0; } template - inline int signbit_impl(T x, generic_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(T x, generic_tag const&) { return x < 0; } @@ -54,18 +54,18 @@ namespace detail { // can occur since the exponents are the same magnitude // for the two types: // - inline int signbit_impl(long double x, generic_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(long double x, generic_tag const&) { return (boost::math::signbit)(static_cast(x)); } - inline int signbit_impl(long double x, generic_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(long double x, generic_tag const&) { return (boost::math::signbit)(static_cast(x)); } #endif template - inline int signbit_impl(T x, ieee_copy_all_bits_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(T x, ieee_copy_all_bits_tag const&) { typedef BOOST_DEDUCED_TYPENAME fp_traits::type traits; @@ -75,7 +75,7 @@ namespace detail { } template - inline int signbit_impl(T x, ieee_copy_leading_bits_tag const&) + inline BOOST_GPU_ENABLED int signbit_impl(T x, ieee_copy_leading_bits_tag const&) { typedef BOOST_DEDUCED_TYPENAME fp_traits::type traits; @@ -91,13 +91,13 @@ namespace detail { // signed zero or NaN. template - inline T (changesign_impl)(T x, generic_tag const&) + inline BOOST_GPU_ENABLED T (changesign_impl)(T x, generic_tag const&) { return -x; } template - inline T (changesign_impl)(T x, generic_tag const&) + inline BOOST_GPU_ENABLED T (changesign_impl)(T x, generic_tag const&) { return -x; } @@ -107,14 +107,14 @@ namespace detail { // in this case we need to change the sign of both // components of the "double double": // - inline long double (changesign_impl)(long double x, generic_tag const&) + inline BOOST_GPU_ENABLED long double (changesign_impl)(long double x, generic_tag const&) { double* pd = reinterpret_cast(&x); pd[0] = boost::math::changesign(pd[0]); pd[1] = boost::math::changesign(pd[1]); return x; } - inline long double (changesign_impl)(long double x, generic_tag const&) + inline BOOST_GPU_ENABLED long double (changesign_impl)(long double x, generic_tag const&) { double* pd = reinterpret_cast(&x); pd[0] = boost::math::changesign(pd[0]); @@ -124,7 +124,7 @@ namespace detail { #endif template - inline T changesign_impl(T x, ieee_copy_all_bits_tag const&) + inline BOOST_GPU_ENABLED T changesign_impl(T x, ieee_copy_all_bits_tag const&) { typedef BOOST_DEDUCED_TYPENAME fp_traits::sign_change_type traits; @@ -136,7 +136,7 @@ namespace detail { } template - inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) + inline BOOST_GPU_ENABLED T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) { typedef BOOST_DEDUCED_TYPENAME fp_traits::sign_change_type traits; @@ -150,7 +150,7 @@ namespace detail { } // namespace detail -template int (signbit)(T x) +template BOOST_GPU_ENABLED int (signbit)(T x) { typedef typename detail::fp_traits::type traits; typedef typename traits::method method; @@ -160,12 +160,12 @@ template int (signbit)(T x) } template -inline int sign BOOST_NO_MACRO_EXPAND(const T& z) +inline BOOST_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z) { return (z == 0) ? 0 : (boost::math::signbit)(z) ? -1 : 1; } -template typename tools::promote_args_permissive::type (changesign)(const T& x) +template BOOST_GPU_ENABLED typename tools::promote_args_permissive::type (changesign)(const T& x) { //!< \brief return unchanged binary pattern of x, except for change of sign bit. typedef typename detail::fp_traits::sign_change_type traits; typedef typename traits::method method; @@ -176,7 +176,7 @@ template typename tools::promote_args_permissive::type (changesign) } template -inline typename tools::promote_args_permissive::type +inline BOOST_GPU_ENABLED typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y) { BOOST_MATH_STD_USING diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp index ae6b3e7442..e77efdf919 100644 --- a/include/boost/math/special_functions/sin_pi.hpp +++ b/include/boost/math/special_functions/sin_pi.hpp @@ -20,7 +20,7 @@ namespace boost{ namespace math{ namespace detail{ template -T sin_pi_imp(T x, const Policy& pol) +BOOST_GPU_ENABLED T sin_pi_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names if(x < 0) @@ -53,7 +53,7 @@ T sin_pi_imp(T x, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type sin_pi(T x, const Policy&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type sin_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -67,7 +67,7 @@ inline typename tools::promote_args::type sin_pi(T x, const Policy&) } template -inline typename tools::promote_args::type sin_pi(T x) +inline BOOST_GPU_ENABLED typename tools::promote_args::type sin_pi(T x) { return boost::math::sin_pi(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/sinc.hpp b/include/boost/math/special_functions/sinc.hpp index 84fbf0e324..90792cc059 100644 --- a/include/boost/math/special_functions/sinc.hpp +++ b/include/boost/math/special_functions/sinc.hpp @@ -39,7 +39,7 @@ namespace boost // This is the "Sinus Cardinal" of index Pi. template - inline T sinc_pi_imp(const T x) + inline BOOST_GPU_ENABLED T sinc_pi_imp(const T x) { BOOST_MATH_STD_USING @@ -77,14 +77,14 @@ namespace boost } // namespace detail template - inline typename tools::promote_args::type sinc_pi(T x) + inline BOOST_GPU_ENABLED typename tools::promote_args::type sinc_pi(T x) { typedef typename tools::promote_args::type result_type; return detail::sinc_pi_imp(static_cast(x)); } template - inline typename tools::promote_args::type sinc_pi(T x, const Policy&) + inline BOOST_GPU_ENABLED typename tools::promote_args::type sinc_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; return detail::sinc_pi_imp(static_cast(x)); @@ -92,7 +92,7 @@ namespace boost #ifndef BOOST_NO_TEMPLATE_TEMPLATES template class U> - inline U sinc_pi(const U x) + inline BOOST_GPU_ENABLED U sinc_pi(const U x) { BOOST_MATH_STD_USING using ::std::numeric_limits; @@ -133,7 +133,7 @@ namespace boost } template class U, class Policy> - inline U sinc_pi(const U x, const Policy&) + inline BOOST_GPU_ENABLED U sinc_pi(const U x, const Policy&) { return sinc_pi(x); } diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp index 3f80c96fee..4adefaab35 100644 --- a/include/boost/math/special_functions/trunc.hpp +++ b/include/boost/math/special_functions/trunc.hpp @@ -18,7 +18,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline typename tools::promote_args::type trunc(const T& v, const Policy& pol, const mpl::false_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy& pol, const mpl::false_&) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; @@ -28,7 +28,7 @@ inline typename tools::promote_args::type trunc(const T& v, const Policy& pol } template -inline typename tools::promote_args::type trunc(const T& v, const Policy&, const mpl::true_&) +inline BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy&, const mpl::true_&) { return v; } @@ -36,12 +36,12 @@ inline typename tools::promote_args::type trunc(const T& v, const Policy&, co } template -inline typename tools::promote_args::type trunc(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy& pol) { return detail::trunc(v, pol, mpl::bool_::value>()); } template -inline typename tools::promote_args::type trunc(const T& v) +inline BOOST_GPU_ENABLED typename tools::promote_args::type trunc(const T& v) { return trunc(v, policies::policy<>()); } @@ -55,33 +55,41 @@ inline typename tools::promote_args::type trunc(const T& v) // dependent lookup. See our concept archetypes for examples. // template -inline int itrunc(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED int itrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; result_type r = boost::math::trunc(v, pol); +#ifdef __CUDA_ARCH__ + if((r > INT_MAX) || (r < INT_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::itrunc<%1%>(%1%)", 0, static_cast(v), 0, pol)); return static_cast(r); } template -inline int itrunc(const T& v) +inline BOOST_GPU_ENABLED int itrunc(const T& v) { return itrunc(v, policies::policy<>()); } template -inline long ltrunc(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED long ltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; result_type r = boost::math::trunc(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LONG_MAX) || (r < LONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::ltrunc<%1%>(%1%)", 0, static_cast(v), 0L, pol)); return static_cast(r); } template -inline long ltrunc(const T& v) +inline BOOST_GPU_ENABLED long ltrunc(const T& v) { return ltrunc(v, policies::policy<>()); } @@ -89,17 +97,21 @@ inline long ltrunc(const T& v) #ifdef BOOST_HAS_LONG_LONG template -inline boost::long_long_type lltrunc(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; result_type r = boost::math::trunc(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LLONG_MAX) || (r < LLONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::lltrunc<%1%>(%1%)", 0, v, static_cast(0), pol)); return static_cast(r); } template -inline boost::long_long_type lltrunc(const T& v) +inline BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v) { return lltrunc(v, policies::policy<>()); } diff --git a/include/boost/math/tools/big_constant.hpp b/include/boost/math/tools/big_constant.hpp index a6f7bc0f1a..4f39bf07c3 100644 --- a/include/boost/math/tools/big_constant.hpp +++ b/include/boost/math/tools/big_constant.hpp @@ -62,6 +62,10 @@ inline BOOST_MATH_CONSTEXPR T make_big_value(largest_float, const char* s, mpl:: // // For constants which might fit in a long double (if it's big enough): // +#ifdef __CUDA_ARCH__ +// We don't support anything except float and double: +#define BOOST_MATH_BIG_CONSTANT(T, D, x) static_cast(x) +#else #define BOOST_MATH_BIG_CONSTANT(T, D, x)\ boost::math::tools::make_big_value(\ BOOST_MATH_LARGEST_FLOAT_C(x), \ @@ -72,6 +76,7 @@ inline BOOST_MATH_CONSTEXPR T make_big_value(largest_float, const char* s, mpl:: || (boost::math::tools::numeric_traits::is_specialized && \ (boost::math::tools::numeric_traits::digits10 <= boost::math::tools::numeric_traits::digits10))) >(), \ boost::is_convertible()) +#endif // // For constants too huge for any conceivable long double (and which generate compiler errors if we try and declare them as such): // diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 17bfec16fe..3ce6b70655 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -265,6 +265,43 @@ # define BOOST_MATH_INT_VALUE_SUFFIX(RV, SUF) RV##SUF #endif // +// function scope static variables aren't allowed on CUDA device code: +// +#if defined(__CUDA_ARCH__) && defined(BOOST_NO_CXX11_CONSTEXPR) +#define BOOST_MATH_GPU_STATIC +#elif defined(__CUDA_ARCH__) +#define BOOST_MATH_GPU_STATIC constexpr +#else +#define BOOST_MATH_GPU_STATIC static +#endif +// +// Asserts don't work on CUDA either: +// +#ifdef __CUDA_ARCH__ +#define BOOST_MATH_ASSERT(x) +#else +#define BOOST_MATH_ASSERT(x) BOOST_ASSERT(x) +#endif +// +// And std lib functions swap/min/max aren't marked up as device safe: +// +#ifdef __CUDA_ARCH__ +template +inline BOOST_GPU_ENABLED void cuda_safe_swap(T& a, T& b) { T t(a); a = b; b = t; } +template +inline BOOST_GPU_ENABLED T cuda_safe_min(const T& a, const T& b) { return a < b ? a : b; } +template +inline BOOST_GPU_ENABLED T cuda_safe_max(const T& a, const T& b) { return a > b ? a : b; } + +# define BOOST_MATH_CUDA_SAFE_SWAP(a, b) cuda_safe_swap(a, b) +# define BOOST_MATH_CUDA_SAFE_MIN(a, b) cuda_safe_min(a, b) +# define BOOST_MATH_CUDA_SAFE_MAX(a, b) cuda_safe_max(a, b) +#else +# define BOOST_MATH_CUDA_SAFE_SWAP(a, b) swap(a, b) +# define BOOST_MATH_CUDA_SAFE_MIN(a, b) (std::min)(a, b) +# define BOOST_MATH_CUDA_SAFE_MAX(a, b) (std::max)(a, b) +#endif +// // And then the actual configuration: // #if defined(_GLIBCXX_USE_FLOAT128) && defined(BOOST_GCC) && !defined(__STRICT_ANSI__) \ diff --git a/include/boost/math/tools/detail/polynomial_horner1_10.hpp b/include/boost/math/tools/detail/polynomial_horner1_10.hpp index 48cf9e39b3..5cc32bfc23 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_10.hpp @@ -12,67 +12,67 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_11.hpp b/include/boost/math/tools/detail/polynomial_horner1_11.hpp index e52595429e..a54773aad4 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_11.hpp @@ -12,73 +12,73 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_12.hpp b/include/boost/math/tools/detail/polynomial_horner1_12.hpp index 10cd178975..54bd8f755d 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_12.hpp @@ -12,79 +12,79 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_13.hpp b/include/boost/math/tools/detail/polynomial_horner1_13.hpp index 90fa9ec43d..60d85776cc 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_13.hpp @@ -12,85 +12,85 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_14.hpp b/include/boost/math/tools/detail/polynomial_horner1_14.hpp index 389c8063b0..8786979bcb 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_14.hpp @@ -12,91 +12,91 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_15.hpp b/include/boost/math/tools/detail/polynomial_horner1_15.hpp index d196a41603..572562d051 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_15.hpp @@ -12,97 +12,97 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_16.hpp b/include/boost/math/tools/detail/polynomial_horner1_16.hpp index fa48c6614c..5ddfe42eb7 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_16.hpp @@ -12,103 +12,103 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_17.hpp b/include/boost/math/tools/detail/polynomial_horner1_17.hpp index 4575aeac7b..aef785ab7d 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_17.hpp @@ -12,109 +12,109 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_18.hpp b/include/boost/math/tools/detail/polynomial_horner1_18.hpp index 95dd400416..70ad76b306 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_18.hpp @@ -12,115 +12,115 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_19.hpp b/include/boost/math/tools/detail/polynomial_horner1_19.hpp index 6d73eb8ffc..eff44d8e1b 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_19.hpp @@ -12,121 +12,121 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_2.hpp b/include/boost/math/tools/detail/polynomial_horner1_2.hpp index 85640edf67..9502347742 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_2.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_20.hpp b/include/boost/math/tools/detail/polynomial_horner1_20.hpp index f8b7f34384..d5fe038f91 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_20.hpp @@ -12,127 +12,127 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_3.hpp b/include/boost/math/tools/detail/polynomial_horner1_3.hpp index c0ad4660e9..4ae37d858c 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_3.hpp @@ -12,25 +12,25 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_4.hpp b/include/boost/math/tools/detail/polynomial_horner1_4.hpp index 27ad74eb95..8bb5cdf43b 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_5.hpp b/include/boost/math/tools/detail/polynomial_horner1_5.hpp index 5cfafb4052..8f8ec45a74 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_6.hpp b/include/boost/math/tools/detail/polynomial_horner1_6.hpp index f7f4bee480..faf61aa84c 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_6.hpp @@ -12,43 +12,43 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_7.hpp b/include/boost/math/tools/detail/polynomial_horner1_7.hpp index c612822fc0..3102726e84 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_7.hpp @@ -12,49 +12,49 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_8.hpp b/include/boost/math/tools/detail/polynomial_horner1_8.hpp index 5d9b453b9d..9a91bc4ce3 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_8.hpp @@ -12,55 +12,55 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner1_9.hpp b/include/boost/math/tools/detail/polynomial_horner1_9.hpp index 84977fd13b..d9f280f31c 100644 --- a/include/boost/math/tools/detail/polynomial_horner1_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner1_9.hpp @@ -12,61 +12,61 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_10.hpp b/include/boost/math/tools/detail/polynomial_horner2_10.hpp index f10c5d0492..2647f9d42e 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_10.hpp @@ -12,72 +12,72 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_11.hpp b/include/boost/math/tools/detail/polynomial_horner2_11.hpp index 757357addf..0db7c32f48 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_11.hpp @@ -12,79 +12,79 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_12.hpp b/include/boost/math/tools/detail/polynomial_horner2_12.hpp index e0fc8a9df7..429967a66f 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_12.hpp @@ -12,86 +12,86 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_13.hpp b/include/boost/math/tools/detail/polynomial_horner2_13.hpp index c13a0a7cc2..5b5a88efc5 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_13.hpp @@ -12,93 +12,93 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_14.hpp b/include/boost/math/tools/detail/polynomial_horner2_14.hpp index ccc6e7d8da..fca4cc8f4e 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_14.hpp @@ -12,100 +12,100 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_15.hpp b/include/boost/math/tools/detail/polynomial_horner2_15.hpp index a409ba7fdb..0de1a1f37b 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_15.hpp @@ -12,107 +12,107 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_16.hpp b/include/boost/math/tools/detail/polynomial_horner2_16.hpp index a69c1a3619..8ff354a73a 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_16.hpp @@ -12,114 +12,114 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_17.hpp b/include/boost/math/tools/detail/polynomial_horner2_17.hpp index 9e7c05d007..965601ef3b 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_17.hpp @@ -12,121 +12,121 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_18.hpp b/include/boost/math/tools/detail/polynomial_horner2_18.hpp index 1ea0e052c0..4d16725d63 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_18.hpp @@ -12,128 +12,128 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_19.hpp b/include/boost/math/tools/detail/polynomial_horner2_19.hpp index 45c125c871..21519449d8 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_19.hpp @@ -12,135 +12,135 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_2.hpp b/include/boost/math/tools/detail/polynomial_horner2_2.hpp index 20da483c33..188e80a397 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_20.hpp b/include/boost/math/tools/detail/polynomial_horner2_20.hpp index 956c6dba4f..0f098bc527 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_20.hpp @@ -12,142 +12,142 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((((((((a[19] * x2 + a[17]) * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_3.hpp b/include/boost/math/tools/detail/polynomial_horner2_3.hpp index 58b290c7c9..fb76d418d7 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_4.hpp b/include/boost/math/tools/detail/polynomial_horner2_4.hpp index 74f969b749..ea4f9a6439 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner2_5.hpp b/include/boost/math/tools/detail/polynomial_horner2_5.hpp index 134cbc8ef9..0fd24c1311 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_6.hpp b/include/boost/math/tools/detail/polynomial_horner2_6.hpp index 7cb75d75c8..47fb294db5 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_6.hpp @@ -12,44 +12,44 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_7.hpp b/include/boost/math/tools/detail/polynomial_horner2_7.hpp index 327639deb6..52b55d78c8 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_7.hpp @@ -12,51 +12,51 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner2_8.hpp b/include/boost/math/tools/detail/polynomial_horner2_8.hpp index 2145ad6dcd..4ef422b083 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_8.hpp @@ -12,58 +12,58 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); diff --git a/include/boost/math/tools/detail/polynomial_horner2_9.hpp b/include/boost/math/tools/detail/polynomial_horner2_9.hpp index 08f6336e49..c7e4ddc66b 100644 --- a/include/boost/math/tools/detail/polynomial_horner2_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner2_9.hpp @@ -12,65 +12,65 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; return static_cast((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x); diff --git a/include/boost/math/tools/detail/polynomial_horner3_10.hpp b/include/boost/math/tools/detail/polynomial_horner3_10.hpp index 05ad20cc30..ec744e7cfa 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_10.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_11.hpp b/include/boost/math/tools/detail/polynomial_horner3_11.hpp index 9f8628e750..0d81fddd2f 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_11.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_12.hpp b/include/boost/math/tools/detail/polynomial_horner3_12.hpp index ccb3ec4d10..61fae85f05 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_12.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_13.hpp b/include/boost/math/tools/detail/polynomial_horner3_13.hpp index a8cdf2c508..464b874f5c 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_13.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_14.hpp b/include/boost/math/tools/detail/polynomial_horner3_14.hpp index 5eb4ef1f28..d1f51b9f01 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_14.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_15.hpp b/include/boost/math/tools/detail/polynomial_horner3_15.hpp index 7e8edcd8aa..95692b2499 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_15.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_16.hpp b/include/boost/math/tools/detail/polynomial_horner3_16.hpp index 58bea1581f..ce1dc45d0e 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_16.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_17.hpp b/include/boost/math/tools/detail/polynomial_horner3_17.hpp index 007b8a4918..81bc012f86 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_17.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_18.hpp b/include/boost/math/tools/detail/polynomial_horner3_18.hpp index 68935adba1..4bcfe2066f 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_18.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_19.hpp b/include/boost/math/tools/detail/polynomial_horner3_19.hpp index acffba6b47..67f4a3dedc 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_19.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_2.hpp b/include/boost/math/tools/detail/polynomial_horner3_2.hpp index 6b8d9a71f8..abf0326c81 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_2.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_20.hpp b/include/boost/math/tools/detail/polynomial_horner3_20.hpp index b3a170c095..c49bab24c1 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_20.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<10>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<11>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<12>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<13>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<14>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<15>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<16>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<17>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<18>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -447,7 +447,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<19>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_3.hpp b/include/boost/math/tools/detail/polynomial_horner3_3.hpp index 05fe88fccc..41a5588d0a 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_3.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_4.hpp b/include/boost/math/tools/detail/polynomial_horner3_4.hpp index b98d6f678d..a437914475 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_4.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } diff --git a/include/boost/math/tools/detail/polynomial_horner3_5.hpp b/include/boost/math/tools/detail/polynomial_horner3_5.hpp index 12e639cef3..84a6ea87a1 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_5.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_6.hpp b/include/boost/math/tools/detail/polynomial_horner3_6.hpp index b645cb5bbc..d925d3e53f 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_6.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_7.hpp b/include/boost/math/tools/detail/polynomial_horner3_7.hpp index 3df4b5b4ef..c4500e7e8e 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_7.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_8.hpp b/include/boost/math/tools/detail/polynomial_horner3_8.hpp index 9a49d2555e..885d3955e2 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_8.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/polynomial_horner3_9.hpp b/include/boost/math/tools/detail/polynomial_horner3_9.hpp index 3507d37604..efdf7af30b 100644 --- a/include/boost/math/tools/detail/polynomial_horner3_9.hpp +++ b/include/boost/math/tools/detail/polynomial_horner3_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[1] * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[2] * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[3] * x + a[2]) * x + a[1]) * x + a[0]); } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<5>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<6>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<7>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; @@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<8>*) } template -inline V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { V x2 = x * x; V t[2]; diff --git a/include/boost/math/tools/detail/rational_horner1_10.hpp b/include/boost/math/tools/detail/rational_horner1_10.hpp index e670853869..4ee20eb817 100644 --- a/include/boost/math/tools/detail/rational_horner1_10.hpp +++ b/include/boost/math/tools/detail/rational_horner1_10.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_11.hpp b/include/boost/math/tools/detail/rational_horner1_11.hpp index 65e17598ff..05fddffff6 100644 --- a/include/boost/math/tools/detail/rational_horner1_11.hpp +++ b/include/boost/math/tools/detail/rational_horner1_11.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_12.hpp b/include/boost/math/tools/detail/rational_horner1_12.hpp index de33af0e07..632b0b110c 100644 --- a/include/boost/math/tools/detail/rational_horner1_12.hpp +++ b/include/boost/math/tools/detail/rational_horner1_12.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_13.hpp b/include/boost/math/tools/detail/rational_horner1_13.hpp index ed4ac1af8b..9004ff5bbe 100644 --- a/include/boost/math/tools/detail/rational_horner1_13.hpp +++ b/include/boost/math/tools/detail/rational_horner1_13.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_14.hpp b/include/boost/math/tools/detail/rational_horner1_14.hpp index a3222f8212..1a712f8b4b 100644 --- a/include/boost/math/tools/detail/rational_horner1_14.hpp +++ b/include/boost/math/tools/detail/rational_horner1_14.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_15.hpp b/include/boost/math/tools/detail/rational_horner1_15.hpp index c8cd691573..7efdb95720 100644 --- a/include/boost/math/tools/detail/rational_horner1_15.hpp +++ b/include/boost/math/tools/detail/rational_horner1_15.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_16.hpp b/include/boost/math/tools/detail/rational_horner1_16.hpp index 8003c82db7..33b6360c00 100644 --- a/include/boost/math/tools/detail/rational_horner1_16.hpp +++ b/include/boost/math/tools/detail/rational_horner1_16.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_17.hpp b/include/boost/math/tools/detail/rational_horner1_17.hpp index 294c3f175d..811c9b0ee7 100644 --- a/include/boost/math/tools/detail/rational_horner1_17.hpp +++ b/include/boost/math/tools/detail/rational_horner1_17.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_18.hpp b/include/boost/math/tools/detail/rational_horner1_18.hpp index 1a48bba097..20954a66d7 100644 --- a/include/boost/math/tools/detail/rational_horner1_18.hpp +++ b/include/boost/math/tools/detail/rational_horner1_18.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_19.hpp b/include/boost/math/tools/detail/rational_horner1_19.hpp index 12fd75cf1b..46439b6084 100644 --- a/include/boost/math/tools/detail/rational_horner1_19.hpp +++ b/include/boost/math/tools/detail/rational_horner1_19.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_2.hpp b/include/boost/math/tools/detail/rational_horner1_2.hpp index c838f2a2cd..eb2da6caf5 100644 --- a/include/boost/math/tools/detail/rational_horner1_2.hpp +++ b/include/boost/math/tools/detail/rational_horner1_2.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_20.hpp b/include/boost/math/tools/detail/rational_horner1_20.hpp index 3ee3e966e6..caece95794 100644 --- a/include/boost/math/tools/detail/rational_horner1_20.hpp +++ b/include/boost/math/tools/detail/rational_horner1_20.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -240,7 +240,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((((b[19] * x + b[18]) * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_3.hpp b/include/boost/math/tools/detail/rational_horner1_3.hpp index 034ead3f66..1290078e37 100644 --- a/include/boost/math/tools/detail/rational_horner1_3.hpp +++ b/include/boost/math/tools/detail/rational_horner1_3.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_4.hpp b/include/boost/math/tools/detail/rational_horner1_4.hpp index de2972f4c4..6e99d93d23 100644 --- a/include/boost/math/tools/detail/rational_horner1_4.hpp +++ b/include/boost/math/tools/detail/rational_horner1_4.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_5.hpp b/include/boost/math/tools/detail/rational_horner1_5.hpp index a59ff114d1..ada4dc55a1 100644 --- a/include/boost/math/tools/detail/rational_horner1_5.hpp +++ b/include/boost/math/tools/detail/rational_horner1_5.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_6.hpp b/include/boost/math/tools/detail/rational_horner1_6.hpp index c5000c5db4..eca7f5e0f5 100644 --- a/include/boost/math/tools/detail/rational_horner1_6.hpp +++ b/include/boost/math/tools/detail/rational_horner1_6.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_7.hpp b/include/boost/math/tools/detail/rational_horner1_7.hpp index bc860f3bf7..21dd256964 100644 --- a/include/boost/math/tools/detail/rational_horner1_7.hpp +++ b/include/boost/math/tools/detail/rational_horner1_7.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_8.hpp b/include/boost/math/tools/detail/rational_horner1_8.hpp index 69368978b6..012b3ab71c 100644 --- a/include/boost/math/tools/detail/rational_horner1_8.hpp +++ b/include/boost/math/tools/detail/rational_horner1_8.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner1_9.hpp b/include/boost/math/tools/detail/rational_horner1_9.hpp index 0aafea15fd..06991281f1 100644 --- a/include/boost/math/tools/detail/rational_horner1_9.hpp +++ b/include/boost/math/tools/detail/rational_horner1_9.hpp @@ -12,19 +12,19 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); @@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); @@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); @@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); @@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) return static_cast(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0])); diff --git a/include/boost/math/tools/detail/rational_horner2_10.hpp b/include/boost/math/tools/detail/rational_horner2_10.hpp index 127777bc2a..28fa196157 100644 --- a/include/boost/math/tools/detail/rational_horner2_10.hpp +++ b/include/boost/math/tools/detail/rational_horner2_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_11.hpp b/include/boost/math/tools/detail/rational_horner2_11.hpp index 53983ed4ad..7f562d062d 100644 --- a/include/boost/math/tools/detail/rational_horner2_11.hpp +++ b/include/boost/math/tools/detail/rational_horner2_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_12.hpp b/include/boost/math/tools/detail/rational_horner2_12.hpp index 4ad0856b06..dbcec7003b 100644 --- a/include/boost/math/tools/detail/rational_horner2_12.hpp +++ b/include/boost/math/tools/detail/rational_horner2_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_13.hpp b/include/boost/math/tools/detail/rational_horner2_13.hpp index 5f9303d0c3..a73935f666 100644 --- a/include/boost/math/tools/detail/rational_horner2_13.hpp +++ b/include/boost/math/tools/detail/rational_horner2_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_14.hpp b/include/boost/math/tools/detail/rational_horner2_14.hpp index dc512f393f..3b99df73d8 100644 --- a/include/boost/math/tools/detail/rational_horner2_14.hpp +++ b/include/boost/math/tools/detail/rational_horner2_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_15.hpp b/include/boost/math/tools/detail/rational_horner2_15.hpp index cf084e8be0..b9a0e0a5e6 100644 --- a/include/boost/math/tools/detail/rational_horner2_15.hpp +++ b/include/boost/math/tools/detail/rational_horner2_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_16.hpp b/include/boost/math/tools/detail/rational_horner2_16.hpp index 3d13db2553..748fb127ac 100644 --- a/include/boost/math/tools/detail/rational_horner2_16.hpp +++ b/include/boost/math/tools/detail/rational_horner2_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_17.hpp b/include/boost/math/tools/detail/rational_horner2_17.hpp index 3adf4053bd..f06f178975 100644 --- a/include/boost/math/tools/detail/rational_horner2_17.hpp +++ b/include/boost/math/tools/detail/rational_horner2_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_18.hpp b/include/boost/math/tools/detail/rational_horner2_18.hpp index 607609fd8e..8f7c8f1f5d 100644 --- a/include/boost/math/tools/detail/rational_horner2_18.hpp +++ b/include/boost/math/tools/detail/rational_horner2_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_19.hpp b/include/boost/math/tools/detail/rational_horner2_19.hpp index bc324c3be3..ab515ab96b 100644 --- a/include/boost/math/tools/detail/rational_horner2_19.hpp +++ b/include/boost/math/tools/detail/rational_horner2_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_2.hpp b/include/boost/math/tools/detail/rational_horner2_2.hpp index 95ec0251d5..71c4ebae20 100644 --- a/include/boost/math/tools/detail/rational_horner2_2.hpp +++ b/include/boost/math/tools/detail/rational_horner2_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_20.hpp b/include/boost/math/tools/detail/rational_horner2_20.hpp index cf1211b61f..9914426a24 100644 --- a/include/boost/math/tools/detail/rational_horner2_20.hpp +++ b/include/boost/math/tools/detail/rational_horner2_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -282,7 +282,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_3.hpp b/include/boost/math/tools/detail/rational_horner2_3.hpp index 9ce437b41f..7b50dc4c40 100644 --- a/include/boost/math/tools/detail/rational_horner2_3.hpp +++ b/include/boost/math/tools/detail/rational_horner2_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_4.hpp b/include/boost/math/tools/detail/rational_horner2_4.hpp index 00543ede85..5e3105d680 100644 --- a/include/boost/math/tools/detail/rational_horner2_4.hpp +++ b/include/boost/math/tools/detail/rational_horner2_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner2_5.hpp b/include/boost/math/tools/detail/rational_horner2_5.hpp index d117b66633..d073e51168 100644 --- a/include/boost/math/tools/detail/rational_horner2_5.hpp +++ b/include/boost/math/tools/detail/rational_horner2_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_6.hpp b/include/boost/math/tools/detail/rational_horner2_6.hpp index c431d16344..be7fb6ca7a 100644 --- a/include/boost/math/tools/detail/rational_horner2_6.hpp +++ b/include/boost/math/tools/detail/rational_horner2_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_7.hpp b/include/boost/math/tools/detail/rational_horner2_7.hpp index 2104302472..c1ce46889d 100644 --- a/include/boost/math/tools/detail/rational_horner2_7.hpp +++ b/include/boost/math/tools/detail/rational_horner2_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_8.hpp b/include/boost/math/tools/detail/rational_horner2_8.hpp index fd98289b99..9133066891 100644 --- a/include/boost/math/tools/detail/rational_horner2_8.hpp +++ b/include/boost/math/tools/detail/rational_horner2_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner2_9.hpp b/include/boost/math/tools/detail/rational_horner2_9.hpp index 1081ab2f8b..286514ff4a 100644 --- a/include/boost/math/tools/detail/rational_horner2_9.hpp +++ b/include/boost/math/tools/detail/rational_horner2_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_10.hpp b/include/boost/math/tools/detail/rational_horner3_10.hpp index 7da05875f3..de2381d079 100644 --- a/include/boost/math/tools/detail/rational_horner3_10.hpp +++ b/include/boost/math/tools/detail/rational_horner3_10.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_11.hpp b/include/boost/math/tools/detail/rational_horner3_11.hpp index df971197a3..e292111f04 100644 --- a/include/boost/math/tools/detail/rational_horner3_11.hpp +++ b/include/boost/math/tools/detail/rational_horner3_11.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_12.hpp b/include/boost/math/tools/detail/rational_horner3_12.hpp index 668f76684f..d2203a10b1 100644 --- a/include/boost/math/tools/detail/rational_horner3_12.hpp +++ b/include/boost/math/tools/detail/rational_horner3_12.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_13.hpp b/include/boost/math/tools/detail/rational_horner3_13.hpp index b0b4c2ac58..0bfbb3a4db 100644 --- a/include/boost/math/tools/detail/rational_horner3_13.hpp +++ b/include/boost/math/tools/detail/rational_horner3_13.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_14.hpp b/include/boost/math/tools/detail/rational_horner3_14.hpp index 92035ef806..4af1fa7dde 100644 --- a/include/boost/math/tools/detail/rational_horner3_14.hpp +++ b/include/boost/math/tools/detail/rational_horner3_14.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_15.hpp b/include/boost/math/tools/detail/rational_horner3_15.hpp index 9536ecd844..3c88bbd3e4 100644 --- a/include/boost/math/tools/detail/rational_horner3_15.hpp +++ b/include/boost/math/tools/detail/rational_horner3_15.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_16.hpp b/include/boost/math/tools/detail/rational_horner3_16.hpp index 7ccf8f6e7a..bb02b5cb97 100644 --- a/include/boost/math/tools/detail/rational_horner3_16.hpp +++ b/include/boost/math/tools/detail/rational_horner3_16.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_17.hpp b/include/boost/math/tools/detail/rational_horner3_17.hpp index 1a35c47397..167bfd5904 100644 --- a/include/boost/math/tools/detail/rational_horner3_17.hpp +++ b/include/boost/math/tools/detail/rational_horner3_17.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_18.hpp b/include/boost/math/tools/detail/rational_horner3_18.hpp index 8a1c16eb2f..79b92a87e6 100644 --- a/include/boost/math/tools/detail/rational_horner3_18.hpp +++ b/include/boost/math/tools/detail/rational_horner3_18.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_19.hpp b/include/boost/math/tools/detail/rational_horner3_19.hpp index 15d16bafc3..2403b77c2c 100644 --- a/include/boost/math/tools/detail/rational_horner3_19.hpp +++ b/include/boost/math/tools/detail/rational_horner3_19.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_2.hpp b/include/boost/math/tools/detail/rational_horner3_2.hpp index 95ec0251d5..71c4ebae20 100644 --- a/include/boost/math/tools/detail/rational_horner3_2.hpp +++ b/include/boost/math/tools/detail/rational_horner3_2.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_20.hpp b/include/boost/math/tools/detail/rational_horner3_20.hpp index 78233214d8..a4fab5745d 100644 --- a/include/boost/math/tools/detail/rational_horner3_20.hpp +++ b/include/boost/math/tools/detail/rational_horner3_20.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<10>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<11>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<12>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<13>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<14>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<15>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<16>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<17>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<18>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<19>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -1452,7 +1452,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<20>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_3.hpp b/include/boost/math/tools/detail/rational_horner3_3.hpp index 9ce437b41f..7b50dc4c40 100644 --- a/include/boost/math/tools/detail/rational_horner3_3.hpp +++ b/include/boost/math/tools/detail/rational_horner3_3.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_4.hpp b/include/boost/math/tools/detail/rational_horner3_4.hpp index 00543ede85..5e3105d680 100644 --- a/include/boost/math/tools/detail/rational_horner3_4.hpp +++ b/include/boost/math/tools/detail/rational_horner3_4.hpp @@ -12,31 +12,31 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } diff --git a/include/boost/math/tools/detail/rational_horner3_5.hpp b/include/boost/math/tools/detail/rational_horner3_5.hpp index 35dce45a80..f5138c06aa 100644 --- a/include/boost/math/tools/detail/rational_horner3_5.hpp +++ b/include/boost/math/tools/detail/rational_horner3_5.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_6.hpp b/include/boost/math/tools/detail/rational_horner3_6.hpp index b9361ba07a..a1f7199828 100644 --- a/include/boost/math/tools/detail/rational_horner3_6.hpp +++ b/include/boost/math/tools/detail/rational_horner3_6.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_7.hpp b/include/boost/math/tools/detail/rational_horner3_7.hpp index 92b00b3a15..4c5487ecba 100644 --- a/include/boost/math/tools/detail/rational_horner3_7.hpp +++ b/include/boost/math/tools/detail/rational_horner3_7.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_8.hpp b/include/boost/math/tools/detail/rational_horner3_8.hpp index 197b6c0550..f1f0710ac0 100644 --- a/include/boost/math/tools/detail/rational_horner3_8.hpp +++ b/include/boost/math/tools/detail/rational_horner3_8.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/detail/rational_horner3_9.hpp b/include/boost/math/tools/detail/rational_horner3_9.hpp index 5aad957c37..695fc2d1d1 100644 --- a/include/boost/math/tools/detail/rational_horner3_9.hpp +++ b/include/boost/math/tools/detail/rational_horner3_9.hpp @@ -12,37 +12,37 @@ namespace boost{ namespace math{ namespace tools{ namespace detail{ template -inline V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T*, const U*, const V&, const mpl::int_<0>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(0); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V&, const mpl::int_<1>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(a[0]) / static_cast(b[0]); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<2>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((a[1] * x + a[0]) / (b[1] * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<3>*) BOOST_MATH_NOEXCEPT(V) { return static_cast(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<4>*) BOOST_MATH_NOEXCEPT(V) { return static_cast((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0])); } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<5>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<6>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<7>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<8>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { @@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl:: } template -inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const mpl::int_<9>*) BOOST_MATH_NOEXCEPT(V) { if(x <= 1) { diff --git a/include/boost/math/tools/fraction.hpp b/include/boost/math/tools/fraction.hpp index a787c603f3..a91f8d3009 100644 --- a/include/boost/math/tools/fraction.hpp +++ b/include/boost/math/tools/fraction.hpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace boost{ namespace math{ namespace tools{ @@ -27,17 +28,22 @@ namespace detail template struct is_pair > : public boost::true_type{}; +#ifdef __CUDA_ARCH__ + template + struct is_pair > : public boost::true_type {}; +#endif + template struct fraction_traits_simple { typedef typename Gen::result_type result_type; typedef typename Gen::result_type value_type; - static result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type) + static BOOST_GPU_ENABLED result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type) { return 1; } - static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + static BOOST_GPU_ENABLED result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v; } @@ -49,11 +55,11 @@ namespace detail typedef typename Gen::result_type value_type; typedef typename value_type::first_type result_type; - static result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + static BOOST_GPU_ENABLED result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v.first; } - static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) + static BOOST_GPU_ENABLED result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type) { return v.second; } @@ -85,7 +91,7 @@ namespace detail // Note that the first a0 returned by generator Gen is disarded. // template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, boost::uintmax_t& max_terms) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names @@ -126,7 +132,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G } template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { boost::uintmax_t max_terms = (std::numeric_limits::max)(); @@ -134,7 +140,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G } template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names @@ -148,7 +154,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G } template -inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, boost::uintmax_t& max_terms) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names @@ -175,7 +181,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_b(G // Note that the first a1 and b1 returned by generator Gen are both used. // template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, boost::uintmax_t& max_terms) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names @@ -217,15 +223,19 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G } template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { +#ifdef __CUDA_ARCH__ + boost::uintmax_t max_iter = ULONG_MAX; +#else boost::uintmax_t max_iter = (std::numeric_limits::max)(); +#endif return continued_fraction_a(g, factor, max_iter); } template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names @@ -240,7 +250,7 @@ inline typename detail::fraction_traits::result_type continued_fraction_a(G } template -inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, boost::uintmax_t& max_terms) +inline BOOST_GPU_ENABLED typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING // ADL of std names diff --git a/include/boost/math/tools/polynomial.hpp b/include/boost/math/tools/polynomial.hpp index 1f4b1f6fe6..95365a0bac 100644 --- a/include/boost/math/tools/polynomial.hpp +++ b/include/boost/math/tools/polynomial.hpp @@ -47,7 +47,7 @@ T chebyshev_coefficient(unsigned n, unsigned m) unsigned r = n - m; r /= 2; - BOOST_ASSERT(n - 2 * r == m); + BOOST_MATH_ASSERT(n - 2 * r == m); if(r & 1) result = -result; @@ -192,9 +192,9 @@ template std::pair< polynomial, polynomial > division(polynomial u, const polynomial& v) { - BOOST_ASSERT(v.size() <= u.size()); - BOOST_ASSERT(v); - BOOST_ASSERT(u); + BOOST_MATH_ASSERT(v.size() <= u.size()); + BOOST_MATH_ASSERT(v); + BOOST_MATH_ASSERT(u); typedef typename polynomial::size_type N; @@ -277,7 +277,7 @@ template std::pair< polynomial, polynomial > quotient_remainder(const polynomial& dividend, const polynomial& divisor) { - BOOST_ASSERT(divisor); + BOOST_MATH_ASSERT(divisor); if (dividend.size() < divisor.size()) return std::make_pair(polynomial(), dividend); return detail::division(dividend, divisor); diff --git a/include/boost/math/tools/precision.hpp b/include/boost/math/tools/precision.hpp index 6538083b99..bda9932b79 100644 --- a/include/boost/math/tools/precision.hpp +++ b/include/boost/math/tools/precision.hpp @@ -24,58 +24,80 @@ namespace boost{ namespace math { -namespace tools -{ -// If T is not specialized, the functions digits, max_value and min_value, -// all get synthesised automatically from std::numeric_limits. -// However, if numeric_limits is not specialised for type RealType, -// for example with NTL::RR type, then you will get a compiler error -// when code tries to use these functions, unless you explicitly specialise them. - -// For example if the precision of RealType varies at runtime, -// then numeric_limits support may not be appropriate, -// see boost/math/tools/ntl.hpp for examples like -// template <> NTL::RR max_value ... -// See Conceptual Requirements for Real Number Types. - -template -inline BOOST_MATH_CONSTEXPR int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_NOEXCEPT -{ + namespace tools + { + // If T is not specialized, the functions digits, max_value and min_value, + // all get synthesised automatically from std::numeric_limits. + // However, if numeric_limits is not specialised for type RealType, + // for example with NTL::RR type, then you will get a compiler error + // when code tries to use these functions, unless you explicitly specialise them. + + // For example if the precision of RealType varies at runtime, + // then numeric_limits support may not be appropriate, + // see boost/math/tools/ntl.hpp for examples like + // template <> NTL::RR max_value ... + // See Conceptual Requirements for Real Number Types. + + template + inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_NOEXCEPT + { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS - BOOST_STATIC_ASSERT( ::std::numeric_limits::is_specialized); - BOOST_STATIC_ASSERT( ::std::numeric_limits::radix == 2 || ::std::numeric_limits::radix == 10); + BOOST_STATIC_ASSERT(::std::numeric_limits::is_specialized); + BOOST_STATIC_ASSERT(::std::numeric_limits::radix == 2 || ::std::numeric_limits::radix == 10); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); - BOOST_ASSERT(::std::numeric_limits::radix == 2 || ::std::numeric_limits::radix == 10); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::radix == 2 || ::std::numeric_limits::radix == 10); #endif - return std::numeric_limits::radix == 2 - ? std::numeric_limits::digits - : ((std::numeric_limits::digits + 1) * 1000L) / 301L; -} - -template -inline BOOST_MATH_CONSTEXPR T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) -{ + return std::numeric_limits::radix == 2 + ? std::numeric_limits::digits + : ((std::numeric_limits::digits + 1) * 1000L) / 301L; + } + + template + inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) + { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS - BOOST_STATIC_ASSERT( ::std::numeric_limits::is_specialized); + BOOST_STATIC_ASSERT(::std::numeric_limits::is_specialized); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); +#endif + return (std::numeric_limits::max)(); + } // Also used as a finite 'infinite' value for - and +infinity, for example: + // -max_value = -1.79769e+308, max_value = 1.79769e+308. + +#ifdef __CUDA_ARCH__ + template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED float max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(float)) BOOST_MATH_NOEXCEPT(float) + { + return FLT_MAX; + } + template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED double max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(double)) BOOST_MATH_NOEXCEPT(double) + { + return DBL_MAX; + } #endif - return (std::numeric_limits::max)(); -} // Also used as a finite 'infinite' value for - and +infinity, for example: -// -max_value = -1.79769e+308, max_value = 1.79769e+308. template -inline BOOST_MATH_CONSTEXPR T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS BOOST_STATIC_ASSERT( ::std::numeric_limits::is_specialized); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); #endif return (std::numeric_limits::min)(); } +#ifdef __CUDA_ARCH__ +template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED float min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(float)) BOOST_MATH_NOEXCEPT(float) +{ + return FLT_MIN; +} +template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED double min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(double)) BOOST_MATH_NOEXCEPT(double) +{ + return DBL_MIN; +} +#endif + namespace detail{ // // Logarithmic limits come next, note that although @@ -86,13 +108,13 @@ namespace detail{ // For type float first: // template -inline BOOST_MATH_CONSTEXPR T log_max_value(const mpl::int_<128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_max_value(const mpl::int_<128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return 88.0f; } template -inline BOOST_MATH_CONSTEXPR T log_min_value(const mpl::int_<128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_min_value(const mpl::int_<128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return -87.0f; } @@ -100,13 +122,13 @@ inline BOOST_MATH_CONSTEXPR T log_min_value(const mpl::int_<128>& BOOST_MATH_APP // Now double: // template -inline BOOST_MATH_CONSTEXPR T log_max_value(const mpl::int_<1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_max_value(const mpl::int_<1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return 709.0; } template -inline BOOST_MATH_CONSTEXPR T log_min_value(const mpl::int_<1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_min_value(const mpl::int_<1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return -708.0; } @@ -114,19 +136,19 @@ inline BOOST_MATH_CONSTEXPR T log_min_value(const mpl::int_<1024>& BOOST_MATH_AP // 80 and 128-bit long doubles: // template -inline BOOST_MATH_CONSTEXPR T log_max_value(const mpl::int_<16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_max_value(const mpl::int_<16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return 11356.0L; } template -inline BOOST_MATH_CONSTEXPR T log_min_value(const mpl::int_<16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_min_value(const mpl::int_<16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return -11355.0L; } template -inline T log_max_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +inline BOOST_GPU_ENABLED T log_max_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { BOOST_MATH_STD_USING #ifdef __SUNPRO_CC @@ -139,7 +161,7 @@ inline T log_max_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_T } template -inline T log_min_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +inline BOOST_GPU_ENABLED T log_min_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { BOOST_MATH_STD_USING #ifdef __SUNPRO_CC @@ -152,14 +174,25 @@ inline T log_min_value(const mpl::int_<0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_T } template -inline BOOST_MATH_CONSTEXPR T epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_MATH_NOEXCEPT(T) { return std::numeric_limits::epsilon(); } +#ifdef __CUDA_ARCH__ +template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED float epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(float)) BOOST_MATH_NOEXCEPT(float) +{ + return FLT_EPSILON; +} +template <> inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED double epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(double)) BOOST_MATH_NOEXCEPT(double) +{ + return DBL_EPSILON; +} +#endif + #if defined(__GNUC__) && ((LDBL_MANT_DIG == 106) || (__LDBL_MANT_DIG__ == 106)) template <> -inline BOOST_MATH_CONSTEXPR long double epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) BOOST_MATH_NOEXCEPT(long double) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED long double epsilon(const mpl::true_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) BOOST_MATH_NOEXCEPT(long double) { // numeric_limits on Darwin (and elsewhere) tells lies here: // the issue is that long double on a few platforms is @@ -178,7 +211,7 @@ inline BOOST_MATH_CONSTEXPR long double epsilon(const mpl::true_& B #endif template -inline T epsilon(const mpl::false_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) +inline BOOST_GPU_ENABLED T epsilon(const mpl::false_& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) { // Note: don't cache result as precision may vary at runtime: BOOST_MATH_STD_USING // for ADL of std names @@ -214,12 +247,12 @@ struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp -inline BOOST_MATH_CONSTEXPR T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT_IF(detail::log_limit_noexcept_traits::value) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT_IF(detail::log_limit_noexcept_traits::value) { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS return detail::log_max_value(typename detail::log_limit_traits::tag_type()); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); BOOST_MATH_STD_USING static const T val = log((std::numeric_limits::max)()); return val; @@ -227,12 +260,12 @@ inline BOOST_MATH_CONSTEXPR T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T) } template -inline BOOST_MATH_CONSTEXPR T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT_IF(detail::log_limit_noexcept_traits::value) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) BOOST_NOEXCEPT_IF(detail::log_limit_noexcept_traits::value) { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS return detail::log_min_value(typename detail::log_limit_traits::tag_type()); #else - BOOST_ASSERT(::std::numeric_limits::is_specialized); + BOOST_MATH_ASSERT(::std::numeric_limits::is_specialized); BOOST_MATH_STD_USING static const T val = log((std::numeric_limits::min)()); return val; @@ -244,7 +277,7 @@ inline BOOST_MATH_CONSTEXPR T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T) #endif template -inline BOOST_MATH_CONSTEXPR T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T) { #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS return detail::epsilon(mpl::bool_< ::std::numeric_limits::is_specialized>()); @@ -258,31 +291,31 @@ inline BOOST_MATH_CONSTEXPR T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) namespace detail{ template -inline BOOST_MATH_CONSTEXPR T root_epsilon_imp(const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T root_epsilon_imp(const T*, const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.00034526698300124390839884978618400831996329879769945L); } template -inline BOOST_MATH_CONSTEXPR T root_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T root_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.1490116119384765625e-7L); } template -inline BOOST_MATH_CONSTEXPR T root_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T root_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.32927225399135962333569506281281311031656150598474e-9L); } template -inline BOOST_MATH_CONSTEXPR T root_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T root_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.1387778780781445675529539585113525390625e-16L); } template -inline T root_epsilon_imp(const T*, const Tag&) +inline BOOST_GPU_ENABLED T root_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING static const T r_eps = sqrt(tools::epsilon()); @@ -297,31 +330,31 @@ inline T root_epsilon_imp(const T*, const mpl::int_<0>&) } template -inline BOOST_MATH_CONSTEXPR T cbrt_epsilon_imp(const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T cbrt_epsilon_imp(const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.0049215666011518482998719164346805794944150447839903L); } template -inline BOOST_MATH_CONSTEXPR T cbrt_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T cbrt_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(6.05545445239333906078989272793696693569753008995e-6L); } template -inline BOOST_MATH_CONSTEXPR T cbrt_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T cbrt_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(4.76837158203125e-7L); } template -inline BOOST_MATH_CONSTEXPR T cbrt_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T cbrt_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(5.7749313854154005630396773604745549542403508090496e-12L); } template -inline T cbrt_epsilon_imp(const T*, const Tag&) +inline BOOST_GPU_ENABLED T cbrt_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING; static const T cbrt_eps = pow(tools::epsilon(), T(1) / 3); @@ -336,31 +369,31 @@ inline T cbrt_epsilon_imp(const T*, const mpl::int_<0>&) } template -inline BOOST_MATH_CONSTEXPR T forth_root_epsilon_imp(const T*, const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T forth_root_epsilon_imp(const T*, const mpl::int_<24>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.018581361171917516667460937040007436176452688944747L); } template -inline BOOST_MATH_CONSTEXPR T forth_root_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T forth_root_epsilon_imp(const T*, const mpl::int_<53>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.0001220703125L); } template -inline BOOST_MATH_CONSTEXPR T forth_root_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T forth_root_epsilon_imp(const T*, const mpl::int_<64>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.18145860519450699870567321328132261891067079047605e-4L); } template -inline BOOST_MATH_CONSTEXPR T forth_root_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T forth_root_epsilon_imp(const T*, const mpl::int_<113>&) BOOST_MATH_NOEXCEPT(T) { return static_cast(0.37252902984619140625e-8L); } template -inline T forth_root_epsilon_imp(const T*, const Tag&) +inline BOOST_GPU_ENABLED T forth_root_epsilon_imp(const T*, const Tag&) { BOOST_MATH_STD_USING static const T r_eps = sqrt(sqrt(tools::epsilon())); @@ -384,19 +417,19 @@ struct root_epsilon_traits } template -inline BOOST_MATH_CONSTEXPR T root_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T root_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) { return detail::root_epsilon_imp(static_cast(0), typename detail::root_epsilon_traits::tag_type()); } template -inline BOOST_MATH_CONSTEXPR T cbrt_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T cbrt_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) { return detail::cbrt_epsilon_imp(static_cast(0), typename detail::root_epsilon_traits::tag_type()); } template -inline BOOST_MATH_CONSTEXPR T forth_root_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) +inline BOOST_MATH_CONSTEXPR BOOST_GPU_ENABLED T forth_root_epsilon() BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(T) && detail::root_epsilon_traits::has_noexcept) { return detail::forth_root_epsilon_imp(static_cast(0), typename detail::root_epsilon_traits::tag_type()); } diff --git a/include/boost/math/tools/rational.hpp b/include/boost/math/tools/rational.hpp index d8bd4a73aa..4a31c7959e 100644 --- a/include/boost/math/tools/rational.hpp +++ b/include/boost/math/tools/rational.hpp @@ -168,12 +168,12 @@ namespace boost{ namespace math{ namespace tools{ // Forward declaration to keep two phase lookup happy: // template -U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U); +BOOST_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U); namespace detail{ template -inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, val, Tag::value); } @@ -186,9 +186,9 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_M // the loop expanded versions above: // template -inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +inline BOOST_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U) { - BOOST_ASSERT(count > 0); + BOOST_MATH_ASSERT(count > 0); U sum = static_cast(poly[count - 1]); for(int i = static_cast(count) - 2; i >= 0; --i) { @@ -202,14 +202,14 @@ inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST // implementations above: // template -inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V) { typedef mpl::int_ tag_type; return detail::evaluate_polynomial_c_imp(static_cast(a), val, static_cast(0)); } template -inline V evaluate_polynomial(const boost::array& a, const V& val) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_polynomial(const boost::array& a, const V& val) BOOST_MATH_NOEXCEPT(V) { typedef mpl::int_ tag_type; return detail::evaluate_polynomial_c_imp(static_cast(a.data()), val, static_cast(0)); @@ -218,19 +218,19 @@ inline V evaluate_polynomial(const boost::array& a, const V& val) BOOST_MAT // Even polynomials are trivial: just square the argument! // template -inline U evaluate_even_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +inline BOOST_GPU_ENABLED U evaluate_even_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) { return evaluate_polynomial(poly, U(z*z), count); } template -inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); } template -inline V evaluate_even_polynomial(const boost::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_even_polynomial(const boost::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); } @@ -238,32 +238,32 @@ inline V evaluate_even_polynomial(const boost::array& a, const V& z) BOOST_ // Odd polynomials come next: // template -inline U evaluate_odd_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) +inline BOOST_GPU_ENABLED U evaluate_odd_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U) { return poly[0] + z * evaluate_polynomial(poly+1, U(z*z), count-1); } template -inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { typedef mpl::int_ tag_type; return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast(a) + 1, V(z*z), static_cast(0)); } template -inline V evaluate_odd_polynomial(const boost::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_odd_polynomial(const boost::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { typedef mpl::int_ tag_type; return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast(a.data()) + 1, V(z*z), static_cast(0)); } template -V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V); +BOOST_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V); namespace detail{ template -inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V) { return boost::math::tools::evaluate_rational(num, denom, z, Tag::value); } @@ -278,7 +278,7 @@ inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const // in our Lanczos code for example. // template -V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V) +BOOST_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V) { V z(z_); V s1, s2; @@ -311,13 +311,13 @@ V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count } template -inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { return detail::evaluate_rational_c_imp(a, b, z, static_cast*>(0)); } template -inline V evaluate_rational(const boost::array& a, const boost::array& b, const V& z) BOOST_MATH_NOEXCEPT(V) +inline BOOST_GPU_ENABLED V evaluate_rational(const boost::array& a, const boost::array& b, const V& z) BOOST_MATH_NOEXCEPT(V) { return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast*>(0)); } diff --git a/include/boost/math/tools/series.hpp b/include/boost/math/tools/series.hpp index ab01549a2d..f5c8276aa4 100644 --- a/include/boost/math/tools/series.hpp +++ b/include/boost/math/tools/series.hpp @@ -21,7 +21,7 @@ namespace boost{ namespace math{ namespace tools{ // Simple series summation come first: // template -inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::uintmax_t& max_terms, const V& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, const U& factor, boost::uintmax_t& max_terms, const V& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING @@ -44,14 +44,14 @@ inline typename Functor::result_type sum_series(Functor& func, const U& factor, } template -inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, const U& factor, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { typename Functor::result_type init_value = 0; return sum_series(func, factor, max_terms, init_value); } template -inline typename Functor::result_type sum_series(Functor& func, int bits, boost::uintmax_t& max_terms, const U& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, int bits, boost::uintmax_t& max_terms, const U& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; @@ -60,7 +60,7 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, boost:: } template -inline typename Functor::result_type sum_series(Functor& func, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; @@ -70,7 +70,7 @@ inline typename Functor::result_type sum_series(Functor& func, int bits) BOOST_N } template -inline typename Functor::result_type sum_series(Functor& func, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING typedef typename Functor::result_type result_type; @@ -79,7 +79,7 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, boost:: } template -inline typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING boost::uintmax_t iters = (std::numeric_limits::max)(); @@ -99,7 +99,7 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, const U // in any case the result is still much better than a naive summation. // template -inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type kahan_sum_series(Functor& func, int bits) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING @@ -122,7 +122,7 @@ inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) B } template -inline typename Functor::result_type kahan_sum_series(Functor& func, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) +inline BOOST_GPU_ENABLED typename Functor::result_type kahan_sum_series(Functor& func, int bits, boost::uintmax_t& max_terms) BOOST_NOEXCEPT_IF(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval()())) { BOOST_MATH_STD_USING diff --git a/include/boost/math/tools/tuple.hpp b/include/boost/math/tools/tuple.hpp index 81de59fa3a..5daab83972 100644 --- a/include/boost/math/tools/tuple.hpp +++ b/include/boost/math/tools/tuple.hpp @@ -8,6 +8,29 @@ # include # include +#ifdef __CUDA_ARCH__ +#include + +namespace boost { + namespace math { + + using thrust::pair; + + } +} + +#else + +#include + +namespace boost { + namespace math { + using std::pair; +} } + +#endif + + #if !defined(BOOST_NO_CXX11_HDR_TUPLE) && !BOOST_WORKAROUND(BOOST_GCC_VERSION, < 40500) #include diff --git a/include/boost/math/tools/workaround.hpp b/include/boost/math/tools/workaround.hpp index 29ce8b1c04..e6eb82eba5 100644 --- a/include/boost/math/tools/workaround.hpp +++ b/include/boost/math/tools/workaround.hpp @@ -19,14 +19,14 @@ namespace boost{ namespace math{ namespace tools{ // std::fmod(1185.0L, 1.5L); // template -inline T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T) +inline BOOST_GPU_ENABLED T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T) { BOOST_MATH_STD_USING return fmod(a, b); } #if (defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__)) && ((LDBL_MANT_DIG == 106) || (__LDBL_MANT_DIG__ == 106)) template <> -inline long double fmod_workaround(long double a, long double b) BOOST_NOEXCEPT +inline BOOST_GPU_ENABLED long double fmod_workaround(long double a, long double b) BOOST_NOEXCEPT { return ::fmodl(a, b); } diff --git a/test/cuda/Jamfile.v2 b/test/cuda/Jamfile.v2 new file mode 100644 index 0000000000..35233df73e --- /dev/null +++ b/test/cuda/Jamfile.v2 @@ -0,0 +1,19 @@ + +import testing ; +import cu ; +import modules ; + +cuda = [ modules.peek : CUDA_PATH ] ; + +searched-lib cuda : : cuda $(cuda)/lib/x64 ; +searched-lib cudart : : cudart $(cuda)/lib/x64 ; + +exe verify_cuda : cuda_check.cu cuda cudart ../../../chrono/build//boost_chrono ../../../system/build//boost_system : BOOST_ALL_NO_LIB release 64 msvc:static ; +explicit verify_cuda ; + +for local source in [ glob test*.cu ] +{ + run $(source) cuda cudart ../../../chrono/build//boost_chrono ../../../system/build//boost_system : : : release BOOST_ALL_NO_LIB 64 msvc:static [ check-target-builds .//verify_cuda "CUDA compiler support" : : no ] ; +} + +run misc/test_naive_monte_carlo.cu misc/test_naive_monte_carlo_host.cpp cuda cudart ../../../chrono/build//boost_chrono ../../../system/build//boost_system : : : release BOOST_ALL_NO_LIB 64 msvc:static [ check-target-builds .//verify_cuda "CUDA compiler support" : : no ] ; diff --git a/test/cuda/cu.jam b/test/cuda/cu.jam new file mode 100644 index 0000000000..f2efd1f82c --- /dev/null +++ b/test/cuda/cu.jam @@ -0,0 +1,15 @@ + + +import os ; +import type ; +type.register CU : cu ; +import generators ; +generators.register-standard cu.inline-file : CU : OBJ ; + +#rule cuda { } + +actions inline-file +{ + nvcc -c -O3 -DBOOST_PP_VARIADICS=0 -expt-extended-lambda -I../../../.. -DBOOST_ALL_NO_LIB -o $(<) $(>) +} + diff --git a/test/cuda/cuda_check.cu b/test/cuda/cuda_check.cu new file mode 100644 index 0000000000..6059c5c686 --- /dev/null +++ b/test/cuda/cuda_check.cu @@ -0,0 +1,107 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in1[i] * in1[i]; + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(input_vector1[i] * input_vector1[i]); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/cuda_managed_ptr.hpp b/test/cuda/cuda_managed_ptr.hpp new file mode 100644 index 0000000000..6ec7f3f7f6 --- /dev/null +++ b/test/cuda/cuda_managed_ptr.hpp @@ -0,0 +1,138 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CUDA_MANAGED_PTR_HPP +#define BOOST_MATH_CUDA_MANAGED_PTR_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +class managed_holder_base +{ +protected: + static int count; + managed_holder_base() { ++count; } + ~managed_holder_base() + { + if(0 == --count) + cudaDeviceSynchronize(); + } +}; + +int managed_holder_base::count = 0; + +// +// Reset the device and exit: +// cudaDeviceReset causes the driver to clean up all state. While +// not mandatory in normal operation, it is good practice. It is also +// needed to ensure correct operation when the application is being +// profiled. Calling cudaDeviceReset causes all profile data to be +// flushed before the application exits. +// +// We have a global instance of this class, plus instances for each +// managed pointer. Last one out the door switches the lights off. +// +class cudaResetter +{ + static int count; +public: + cudaResetter() { ++count; } + ~cudaResetter() + { + if(--count == 0) + { + cudaError_t err = cudaDeviceReset(); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +}; + +int cudaResetter::count = 0; + +cudaResetter global_resetter; + +template +class cuda_managed_ptr +{ + T* data; + static const cudaResetter resetter; + cuda_managed_ptr(const cuda_managed_ptr&) = delete; + cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete; + void free() + { + if(data) + { + cudaDeviceSynchronize(); + cudaError_t err = cudaFree(data); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +public: + cuda_managed_ptr() : data(0) {} + cuda_managed_ptr(std::size_t n) + { + cudaError_t err = cudaSuccess; + void *ptr; + err = cudaMallocManaged(&ptr, n * sizeof(T)); + if(err != cudaSuccess) + throw std::runtime_error(cudaGetErrorString(err)); + cudaDeviceSynchronize(); + data = static_cast(ptr); + } + cuda_managed_ptr(cuda_managed_ptr&& o) + { + data = o.data; + o.data = 0; + } + cuda_managed_ptr& operator=(cuda_managed_ptr&& o) + { + free(); + data = o.data; + o.data = 0; + return *this; + } + ~cuda_managed_ptr() + { + free(); + } + + class managed_holder : managed_holder_base + { + T* pdata; + public: + managed_holder(T* p) : managed_holder_base(), pdata(p) {} + managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator T* () { return pdata; } + T& operator[] (std::size_t n) { return pdata[n]; } + }; + class const_managed_holder : managed_holder_base + { + const T* pdata; + public: + const_managed_holder(T* p) : managed_holder_base(), pdata(p) {} + const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator const T* () { return pdata; } + const T& operator[] (std::size_t n) { return pdata[n]; } + }; + + managed_holder get() { return managed_holder(data); } + const_managed_holder get()const { return data; } + T& operator[](std::size_t n) { return data[n]; } + const T& operator[](std::size_t n)const { return data[n]; } +}; + +template +cudaResetter const cuda_managed_ptr::resetter; + +#endif + + diff --git a/test/cuda/misc/test_naive_monte_carlo.cu b/test/cuda/misc/test_naive_monte_carlo.cu new file mode 100644 index 0000000000..9ebaf93c96 --- /dev/null +++ b/test/cuda/misc/test_naive_monte_carlo.cu @@ -0,0 +1,198 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#ifdef __CUDACC__ +#define BOOST_PP_VARIADICS 0 +#endif + +#include +#include +#include +#include "../stopwatch.hpp" +#include +#include +#include +#include +#include + + +typedef double float_type; + + template + struct pi_calculator + { + __host__ __device__ Real operator()(const Real* abscissa)const + { + return abscissa[0] * abscissa[0] + abscissa[1] * abscissa[1] <= 1 ? 1 : 0; + } + }; + + template + struct hypersphere + { + __host__ __device__ Real operator()(const Real* abscissa)const + { + Real location = 0; + for (unsigned i = 0; i < N; ++i) + location += abscissa[i] * abscissa[i]; + return location <= 1 ? 1 : 0; + } + }; + + void test_host_pi(double); + void test_host_hypersphere_10(); + +/** + * Host main routine + */ +int main(void) +{ + + using boost::math::quadrature::cuda_naive_monte_carlo; + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + boost::uintmax_t max_threads = deviceProp.maxThreadsPerMultiProcessor * deviceProp.multiProcessorCount; + + std::cout << "maxThreadsPerBlock = " << deviceProp.maxThreadsPerBlock << std::endl; + std::cout << "maxThreadsPerMultiProcessor = " << deviceProp.maxThreadsPerMultiProcessor << std::endl; + std::cout << "multiProcessorCount = " << deviceProp.multiProcessorCount << std::endl << std::endl; + std::cout << "Total max threads = " << max_threads << std::endl; + + std::cout << "Testing Pi calculation for circle formula.\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; + std::cout << std::right << std::setw(15); + std::cout << "Threads" << std::right << std::setw(15) << "Init #/thread" + << std::right << std::setw(15) << "Total Points" + << std::right << std::setw(15) << "Time" + << std::right << std::setw(15) << "Error Goal" + << std::right << std::setw(15) << "Variance" + << std::right << std::setw(15) << "Error Est." + << std::right << std::setw(15) << "Error Actual" << std::endl; + + // + // Do something to initialize the CUDA device: + // + do { + std::vector > bounds = { { -1, 1 },{ -1, 1 } }; + cuda_naive_monte_carlo, thrust::random::taus88> init(pi_calculator(), bounds); + init.integrate(0.01); + } while (0); + + + for (double error_goal = 1e-3; error_goal > 1e-5; error_goal /= 2) + { + for (boost::uintmax_t calls_per_thread = 128; calls_per_thread < 2048; calls_per_thread *= 2) + { + try { + std::vector > bounds = { {-1, 1}, {-1, 1} }; + + watch w; + cuda_naive_monte_carlo, thrust::random::taus88> integrator(pi_calculator(), bounds); + double val = integrator.integrate(error_goal, calls_per_thread); + double elapsed = w.elapsed(); + double err = fabs(val - boost::math::constants::pi()); + std::cout << std::right << std::setw(15) << std::fixed << max_threads + << std::right << std::setw(15) << calls_per_thread + << std::right << std::setw(15) << integrator.calls() + << std::right << std::setw(15) << std::fixed << elapsed + << std::right << std::setw(15) << std::scientific << error_goal + << std::right << std::setw(15) << std::scientific << integrator.variance() + << std::right << std::setw(15) << std::scientific << integrator.current_error_estimate() + << std::right << std::setw(15) << std::scientific << err << std::endl; + } + catch (const std::exception& e) + { + std::cout << "Found exception: " << e.what() << std::endl; + } + } + } + + for (double error_goal = 1e-3; error_goal > 1e-5; error_goal /= 2) + { + test_host_pi(error_goal); + } + + std::cout << "Testing Hypersphere volume.\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; + std::cout << std::right << std::setw(15); + std::cout << "Threads" << std::right << std::setw(15) << "Init #/thread" + << std::right << std::setw(15) << "Total Points" + << std::right << std::setw(15) << "Time" + << std::right << std::setw(15) << "Error Goal" + << std::right << std::setw(15) << "Variance" + << std::right << std::setw(15) << "Error Est." + << std::right << std::setw(15) << "Error Actual" << std::endl; + + double hypersphere10 = std::pow(boost::math::constants::pi(), 5) / boost::math::tgamma(6.0); + + // initialized CUDA device code: + { + std::vector > bounds; + std::pair point = { -1.0, 1.0 }; + for (unsigned i = 0; i < 10; ++i) + bounds.push_back(point); + + cuda_naive_monte_carlo, thrust::random::taus88> integrator(hypersphere(), bounds); + double val = integrator.integrate(1e-2); + } + + for (boost::uintmax_t calls_per_thread = 64; calls_per_thread < 4086; calls_per_thread *= 2) + { + try { + double error_goal = 0.02; + + std::vector > bounds; + std::pair point = { -1.0, 1.0 }; + for (unsigned i = 0; i < 10; ++i) + bounds.push_back(point); + + watch w; + cuda_naive_monte_carlo, thrust::random::taus88> integrator(hypersphere(), bounds); + double val = integrator.integrate(error_goal, calls_per_thread); + double elapsed = w.elapsed(); + double err = fabs(val - hypersphere10); + std::cout << std::right << std::setw(15) << std::fixed << max_threads + << std::right << std::setw(15) << calls_per_thread + << std::right << std::setw(15) << integrator.calls() + << std::right << std::setw(15) << std::fixed << elapsed + << std::right << std::setw(15) << std::scientific << error_goal + << std::right << std::setw(15) << std::scientific << integrator.variance() + << std::right << std::setw(15) << std::scientific << integrator.current_error_estimate() + << std::right << std::setw(15) << std::scientific << err << std::endl; + } + catch (const std::exception& e) + { + std::cout << "Found exception: " << e.what() << std::endl; + } + } + + test_host_hypersphere_10(); + + /* Example code from docs */ + + { + + // Define a function to integrate: + auto g = [] __device__ (const double* x) + { + constexpr const double pi = boost::math::constants::pi(); + constexpr const double A = 1.0 / (pi * pi * pi); + return A / (1.0 - cos(x[0])*cos(x[1])*cos(x[2])); + }; + std::vector> bounds{ { 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() },{ 0, boost::math::constants::pi() } }; + double error_goal = 0.001; + cuda_naive_monte_carlo mc(g, bounds); + + double result = mc.integrate(error_goal); + + std::cout << "Integration result is: " << result << std::endl; + + + } + + return 0; +} + diff --git a/test/cuda/misc/test_naive_monte_carlo_host.cpp b/test/cuda/misc/test_naive_monte_carlo_host.cpp new file mode 100644 index 0000000000..5bd819a19e --- /dev/null +++ b/test/cuda/misc/test_naive_monte_carlo_host.cpp @@ -0,0 +1,98 @@ + + +#include +#include +#include +#include +#include "../stopwatch.hpp" +#include +#include +#include +#include + + + +void test_host_pi(double error_goal) +{ + using boost::math::quadrature::naive_monte_carlo; + + + watch w; + auto g = [](std::vector const & x)->double + { + double r = x[0] * x[0] + x[1] * x[1]; + if (r <= 1) + { + return 1; + } + return 0; + }; + + std::cout << "Regular host code:\n"; + + std::vector> bounds2{ { -1.0, 1.0 },{ -1., 1. } }; + + for (unsigned threads = 1; threads < 9; ++threads) + { + w.reset(); + naive_monte_carlo mc(g, bounds2, error_goal, + /*singular =*/ false,/* threads = */ threads, /* seed = */ 128402); + auto task = mc.integrate(); + double val = task.get(); + double elapsed = w.elapsed(); + boost::uintmax_t points = mc.calls(); + double err = fabs(val - boost::math::constants::pi()); + std::cout << std::right << std::setw(15) << threads + << std::right << std::setw(15) << "-" + << std::right << std::setw(15) << points + << std::right << std::setw(15) << std::fixed << elapsed + << std::right << std::setw(15) << std::scientific << error_goal + << std::right << std::setw(15) << std::scientific << mc.variance() + << std::right << std::setw(15) << std::scientific << mc.current_error_estimate() + << std::right << std::setw(15) << std::scientific << err << std::endl; + } +} + +void test_host_hypersphere_10() +{ + using boost::math::quadrature::naive_monte_carlo; + + + watch w; + auto g = [](std::vector const & x)->double + { + double location = 0; + for (unsigned i = 0; i < 10; ++i) + location += x[i] * x[i]; + return location <= 1 ? 1 : 0; + }; + + std::cout << "Regular host code:\n"; + + double hypersphere10 = std::pow(boost::math::constants::pi(), 5) / boost::math::tgamma(6.0); + + std::vector> bounds2; + std::pair point = { -1.0, 1.0 }; + for (unsigned i = 0; i < 10; ++i) + bounds2.push_back(point); + double error_goal = 0.02; + for (unsigned threads = 1; threads < 9; ++threads) + { + w.reset(); + naive_monte_carlo mc(g, bounds2, error_goal, + /*singular =*/ false,/* threads = */ threads, /* seed = */ 128402); + auto task = mc.integrate(); + double val = task.get(); + double elapsed = w.elapsed(); + boost::uintmax_t points = mc.calls(); + double err = fabs(val - hypersphere10); + std::cout << std::right << std::setw(15) << threads + << std::right << std::setw(15) << "-" + << std::right << std::setw(15) << points + << std::right << std::setw(15) << std::fixed << elapsed + << std::right << std::setw(15) << std::scientific << error_goal + << std::right << std::setw(15) << std::scientific << mc.variance() + << std::right << std::setw(15) << std::scientific << mc.current_error_estimate() + << std::right << std::setw(15) << std::scientific << err << std::endl; + } +} diff --git a/test/cuda/misc/test_naive_monte_carlo_output.txt b/test/cuda/misc/test_naive_monte_carlo_output.txt new file mode 100644 index 0000000000..71d7b62737 --- /dev/null +++ b/test/cuda/misc/test_naive_monte_carlo_output.txt @@ -0,0 +1,117 @@ +maxThreadsPerBlock = 1024 +maxThreadsPerMultiProcessor = 2048 +multiProcessorCount = 5 + +Total max threads = 10240 +Testing Pi calculation for circle formula. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Threads Init #/thread Total Points Time Error Goal Variance Error Est. Error Actual + 10240 128 3481600 0.008367 1.000000e-03 2.698975e+00 8.804608e-04 9.676536e-04 + 10240 256 3031040 0.008578 1.000000e-03 2.698542e+00 9.435583e-04 7.776198e-04 + 10240 512 5242880 0.011217 1.000000e-03 2.698333e+00 7.174024e-04 6.861289e-04 + 10240 1024 10485760 0.016828 1.000000e-03 2.696115e+00 5.070715e-04 2.854745e-04 + 10240 128 12154880 0.019123 5.000000e-04 2.697501e+00 4.710917e-04 3.216579e-04 + 10240 256 22097920 0.030329 5.000000e-04 2.696124e+00 3.492965e-04 2.815138e-04 + 10240 512 12226560 0.018789 5.000000e-04 2.695405e+00 4.695262e-04 5.961550e-04 + 10240 1024 21821440 0.030160 5.000000e-04 2.696328e+00 3.515157e-04 1.917566e-04 + 10240 128 47482880 0.056661 2.500000e-04 2.696911e+00 2.383224e-04 6.349613e-05 + 10240 256 45527040 0.052038 2.500000e-04 2.697649e+00 2.434210e-04 3.866362e-04 + 10240 512 46909440 0.051340 2.500000e-04 2.697758e+00 2.398123e-04 4.345413e-04 + 10240 1024 45086720 0.049912 2.500000e-04 2.696520e+00 2.445555e-04 1.079625e-04 + 10240 128 194457600 0.180079 1.250000e-04 2.696297e+00 1.177528e-04 2.055790e-04 + 10240 256 179671040 0.148807 1.250000e-04 2.695943e+00 1.224944e-04 3.606851e-04 + 10240 512 185292800 0.142422 1.250000e-04 2.696969e+00 1.206448e-04 8.891464e-05 + 10240 1024 178534400 0.128817 1.250000e-04 2.696871e+00 1.229049e-04 4.579203e-05 + 10240 128 812759040 0.530420 6.250000e-05 2.696687e+00 5.760158e-05 3.456719e-05 + 10240 256 765757440 0.498838 6.250000e-05 2.696707e+00 5.934324e-05 2.588295e-05 + 10240 512 712366080 0.464361 6.250000e-05 2.696697e+00 6.152682e-05 3.040628e-05 + 10240 1024 723793920 0.471714 6.250000e-05 2.696916e+00 6.104165e-05 6.568415e-05 + 10240 128 2899230720 1.873548 3.125000e-05 2.696716e+00 3.049834e-05 2.181716e-05 + 10240 256 2908538880 1.879682 3.125000e-05 2.696710e+00 3.044947e-05 2.464258e-05 + 10240 512 2887147520 1.867332 3.125000e-05 2.696759e+00 3.056234e-05 3.253501e-06 + 10240 1024 2905436160 1.877813 3.125000e-05 2.696726e+00 3.046581e-05 1.766629e-05 + 10240 128 11596021760 7.482193 1.562500e-05 2.696714e+00 1.524976e-05 2.306342e-05 + 10240 256 11597219840 7.485470 1.562500e-05 2.696743e+00 1.524905e-05 1.027976e-05 + 10240 512 11603496960 7.489412 1.562500e-05 2.696769e+00 1.524500e-05 1.019131e-06 + 10240 1024 11591219200 7.483967 1.562500e-05 2.696742e+00 1.525299e-05 1.074012e-05 +Regular host code: + 1 - 4550657 0.101999 1.000000e-03 2.697200e+00 7.698737e-04 1.895551e-04 + 2 - 9011202 0.101099 1.000000e-03 2.697681e+00 5.471469e-04 4.006128e-04 + 3 - 13193219 0.101715 1.000000e-03 2.697069e+00 4.521376e-04 1.324838e-04 + 4 - 15540228 0.101911 1.000000e-03 2.696240e+00 4.165341e-04 2.304908e-04 + 5 - 17301509 0.101999 1.000000e-03 2.695919e+00 3.947402e-04 3.705127e-04 + 6 - 17262598 0.101882 1.000000e-03 2.696000e+00 3.951908e-04 3.351142e-04 + 7 - 15417351 0.101948 1.000000e-03 2.696114e+00 4.181810e-04 2.850918e-04 + 8 - 22349832 0.128935 1.000000e-03 2.696685e+00 3.473585e-04 3.525663e-05 +Regular host code: + 1 - 13709313 0.302718 5.000000e-04 2.695696e+00 4.434326e-04 4.688049e-04 + 2 - 18317314 0.201833 5.000000e-04 2.696751e+00 3.836980e-04 6.600537e-06 + 3 - 13318147 0.101908 5.000000e-04 2.697159e+00 4.500196e-04 1.721542e-04 + 4 - 17143812 0.101902 5.000000e-04 2.696372e+00 3.965849e-04 1.727834e-04 + 5 - 17346565 0.101924 5.000000e-04 2.696311e+00 3.942559e-04 1.989345e-04 + 6 - 16046086 0.101931 5.000000e-04 2.696458e+00 4.099324e-04 1.344941e-04 + 7 - 17283079 0.101943 5.000000e-04 2.696471e+00 3.949911e-04 1.285640e-04 + 8 - 21749768 0.128923 5.000000e-04 2.696677e+00 3.521171e-04 3.865025e-05 +Regular host code: + 1 - 46278657 1.009327 2.500000e-04 2.696945e+00 2.414047e-04 7.806729e-05 + 2 - 45809666 0.504654 2.500000e-04 2.696625e+00 2.426228e-04 6.203562e-05 + 3 - 52557827 0.403961 2.500000e-04 2.697215e+00 2.265370e-04 1.967964e-04 + 4 - 50624516 0.605888 2.500000e-04 2.696779e+00 2.308034e-04 5.759208e-06 + 5 - 47114245 0.303742 2.500000e-04 2.696477e+00 2.392336e-04 1.267988e-04 + 6 - 46374918 0.303819 2.500000e-04 2.696350e+00 2.411274e-04 1.822170e-04 + 7 - 51236871 0.312092 2.500000e-04 2.696683e+00 2.294160e-04 3.619412e-05 + 8 - 54386696 0.331619 2.500000e-04 2.696812e+00 2.226789e-04 2.016314e-05 +Regular host code: + 1 - 175192065 3.836674 1.250000e-04 2.696896e+00 1.240723e-04 5.692251e-05 + 2 - 175951874 2.020645 1.250000e-04 2.696651e+00 1.237985e-04 5.033908e-05 + 3 - 181583875 1.414251 1.250000e-04 2.696676e+00 1.218641e-04 3.934430e-05 + 4 - 183513092 1.212304 1.250000e-04 2.696707e+00 1.212225e-04 2.579835e-05 + 5 - 175282181 1.212331 1.250000e-04 2.696732e+00 1.240367e-04 1.478682e-05 + 6 - 173148166 1.313198 1.250000e-04 2.696545e+00 1.247943e-04 9.671319e-05 + 7 - 175978503 1.111325 1.250000e-04 2.696631e+00 1.237887e-04 5.943786e-05 + 8 - 173408264 1.080426 1.250000e-04 2.696845e+00 1.247077e-04 3.453269e-05 +Regular host code: + 1 - 693923841 16.151669 6.250000e-05 2.696759e+00 6.233976e-05 3.232291e-06 + 2 - 694083586 7.671572 6.250000e-05 2.696786e+00 6.233290e-05 8.717617e-06 + 3 - 699369475 5.249936 6.250000e-05 2.696698e+00 6.209588e-05 3.006593e-05 + 4 - 699097092 4.240581 6.250000e-05 2.696848e+00 6.210971e-05 3.592687e-05 + 5 - 698124293 4.442437 6.250000e-05 2.696688e+00 6.215113e-05 3.411114e-05 + 6 - 691200006 4.341502 6.250000e-05 2.696544e+00 6.245999e-05 9.729599e-05 + 7 - 700665863 4.251577 6.250000e-05 2.696596e+00 6.203724e-05 7.446656e-05 + 8 - 691546120 4.298594 6.250000e-05 2.696597e+00 6.244496e-05 7.427674e-05 +Regular host code: + 1 - 2762876929 59.859699 3.125000e-05 2.696780e+00 3.124223e-05 6.067197e-06 + 2 - 2763421698 30.383588 3.125000e-05 2.696766e+00 3.123906e-05 2.528722e-07 + 3 - 2772873219 20.996881 3.125000e-05 2.696811e+00 3.118604e-05 1.943188e-05 + 4 - 2762248196 17.363063 3.125000e-05 2.696818e+00 3.124600e-05 2.281328e-05 + 5 - 2764449797 17.665886 3.125000e-05 2.696765e+00 3.123325e-05 3.973764e-07 + 6 - 2768607238 17.766842 3.125000e-05 2.696751e+00 3.120971e-05 6.553629e-06 + 7 - 2766036999 17.587942 3.125000e-05 2.696786e+00 3.122441e-05 8.847176e-06 + 8 - 2766690312 17.630970 3.125000e-05 2.696802e+00 3.122082e-05 1.577008e-05 +Regular host code: + 1 - 11046782977 255.202993 1.562500e-05 2.696767e+00 1.562442e-05 4.741963e-07 + 2 - 11051421698 122.614502 1.562500e-05 2.696776e+00 1.562116e-05 4.096320e-06 + 3 - 11048861699 86.396803 1.562500e-05 2.696747e+00 1.562289e-05 8.501137e-06 + 4 - 11060627460 77.351707 1.562500e-05 2.696739e+00 1.561455e-05 1.196637e-05 + 5 - 11048994821 175.726520 1.562500e-05 2.696776e+00 1.562288e-05 4.087902e-06 + 6 - 11050817542 76.486873 1.562500e-05 2.696752e+00 1.562152e-05 6.182881e-06 + 7 - 11058628615 74.262706 1.562500e-05 2.696773e+00 1.561606e-05 2.913170e-06 + 8 - 11047886856 83.430636 1.562500e-05 2.696755e+00 1.562360e-05 5.093417e-06 +Testing Hypersphere volume. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Threads Init #/thread Total Points Time Error Goal Variance Error Est. Error Actual + 10240 64 7311360 0.053767 2.000000e-02 2.574587e+03 1.876526e-02 2.971586e-02 + 10240 128 8253440 0.060574 2.000000e-02 2.598377e+03 1.774327e-02 6.367514e-03 + 10240 256 9318400 0.065937 2.000000e-02 2.581856e+03 1.664544e-02 2.258162e-02 + 10240 512 12103680 0.076164 2.000000e-02 2.619352e+03 1.471086e-02 1.421836e-02 + 10240 1024 10485760 0.065995 2.000000e-02 2.608479e+03 1.577225e-02 3.546898e-03 + 10240 2048 20971520 0.115725 2.000000e-02 2.609822e+03 1.115554e-02 4.865257e-03 +Regular host code: + 1 - 7063553 0.504751 2.000000e-02 2.616529e+03 1.924647e-02 1.144766e-02 + 2 - 8331266 0.301304 2.000000e-02 2.603470e+03 1.767750e-02 1.369175e-03 + 3 - 8808451 0.302716 2.000000e-02 2.596392e+03 1.716862e-02 8.314173e-03 + 4 - 9146372 0.202616 2.000000e-02 2.630406e+03 1.695848e-02 2.506994e-02 + 5 - 7051269 0.202599 2.000000e-02 2.593405e+03 1.917792e-02 1.124629e-02 + 6 - 7208966 0.202971 2.000000e-02 2.610192e+03 1.902829e-02 5.231561e-03 + 7 - 9146375 0.223786 2.000000e-02 2.604055e+03 1.687332e-02 7.929502e-04 + 8 - 10758152 0.258991 2.000000e-02 2.602338e+03 1.555296e-02 2.478898e-03 diff --git a/test/cuda/stopwatch.hpp b/test/cuda/stopwatch.hpp new file mode 100644 index 0000000000..028fe25fa4 --- /dev/null +++ b/test/cuda/stopwatch.hpp @@ -0,0 +1,41 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CUDA_STOPWATCH_HPP +#define BOOST_MATH_CUDA_STOPWATCH_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +template +struct stopwatch +{ + typedef typename Clock::duration duration; + stopwatch() + { + m_start = Clock::now(); + } + double elapsed() + { + duration t = Clock::now() - m_start; + return boost::chrono::duration_cast>(t).count(); + } + void reset() + { + m_start = Clock::now(); + } + +private: + typename Clock::time_point m_start; +}; + +typedef stopwatch watch; + +#endif + + diff --git a/test/cuda/test_arcsine_cdf_double.cu b/test/cuda/test_arcsine_cdf_double.cu new file mode 100644 index 0000000000..7b93a830cc --- /dev/null +++ b/test/cuda/test_arcsine_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_arcsine_cdf_float.cu b/test/cuda/test_arcsine_cdf_float.cu new file mode 100644 index 0000000000..d42084cd0a --- /dev/null +++ b/test/cuda/test_arcsine_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_arcsine_pdf_double.cu b/test/cuda/test_arcsine_pdf_double.cu new file mode 100644 index 0000000000..3d61ce6f07 --- /dev/null +++ b/test/cuda/test_arcsine_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_arcsine_pdf_float.cu b/test/cuda/test_arcsine_pdf_float.cu new file mode 100644 index 0000000000..2dd5d6dbdf --- /dev/null +++ b/test/cuda/test_arcsine_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_arcsine_quan_double.cu b/test/cuda/test_arcsine_quan_double.cu new file mode 100644 index 0000000000..46b3ab4c9a --- /dev/null +++ b/test/cuda/test_arcsine_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_arcsine_quan_float.cu b/test/cuda/test_arcsine_quan_float.cu new file mode 100644 index 0000000000..39ff89041c --- /dev/null +++ b/test/cuda/test_arcsine_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_beta_double.cu b/test/cuda/test_beta_double.cu new file mode 100644 index 0000000000..2114a75698 --- /dev/null +++ b/test/cuda/test_beta_double.cu @@ -0,0 +1,130 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../beta_med_data.ipp" +#include "../beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_beta_float.cu b/test/cuda/test_beta_float.cu new file mode 100644 index 0000000000..326b32a68a --- /dev/null +++ b/test/cuda/test_beta_float.cu @@ -0,0 +1,130 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../beta_med_data.ipp" +#include "../beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_cdf_double.cu b/test/cuda/test_cauchy_cdf_double.cu new file mode 100644 index 0000000000..4b346f0809 --- /dev/null +++ b/test/cuda/test_cauchy_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_cdf_float.cu b/test/cuda/test_cauchy_cdf_float.cu new file mode 100644 index 0000000000..d839097dec --- /dev/null +++ b/test/cuda/test_cauchy_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_pdf_double.cu b/test/cuda/test_cauchy_pdf_double.cu new file mode 100644 index 0000000000..a6e1c7a400 --- /dev/null +++ b/test/cuda/test_cauchy_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_pdf_float.cu b/test/cuda/test_cauchy_pdf_float.cu new file mode 100644 index 0000000000..d3555f375d --- /dev/null +++ b/test/cuda/test_cauchy_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_quan_double.cu b/test/cuda/test_cauchy_quan_double.cu new file mode 100644 index 0000000000..e0b0080908 --- /dev/null +++ b/test/cuda/test_cauchy_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cauchy_quan_float.cu b/test/cuda/test_cauchy_quan_float.cu new file mode 100644 index 0000000000..3d4ecb5b37 --- /dev/null +++ b/test/cuda/test_cauchy_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_cbrt_double.cu b/test/cuda/test_cbrt_double.cu new file mode 100644 index 0000000000..0e7034e897 --- /dev/null +++ b/test/cuda/test_cbrt_double.cu @@ -0,0 +1,99 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_cbrt_float.cu b/test/cuda/test_cbrt_float.cu new file mode 100644 index 0000000000..c1bd6208b1 --- /dev/null +++ b/test/cuda/test_cbrt_float.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED in " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_changesign_double.cu b/test/cuda/test_changesign_double.cu new file mode 100644 index 0000000000..b95a867a59 --- /dev/null +++ b/test/cuda/test_changesign_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_chi_sq_cdf_double.cu b/test/cuda/test_chi_sq_cdf_double.cu new file mode 100644 index 0000000000..9e52388f73 --- /dev/null +++ b/test/cuda/test_chi_sq_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::chi_squared_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_chi_sq_cdf_float.cu b/test/cuda/test_chi_sq_cdf_float.cu new file mode 100644 index 0000000000..44bc07746d --- /dev/null +++ b/test/cuda/test_chi_sq_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::chi_squared_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::chi_squared_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_chi_sq_pdf_double.cu b/test/cuda/test_chi_sq_pdf_double.cu new file mode 100644 index 0000000000..f114f6aba3 --- /dev/null +++ b/test/cuda/test_chi_sq_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::chi_squared_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_chi_sq_pdf_float.cu b/test/cuda/test_chi_sq_pdf_float.cu new file mode 100644 index 0000000000..6c83af21a5 --- /dev/null +++ b/test/cuda/test_chi_sq_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::chi_squared_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::chi_squared_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_copysign_double.cu b/test/cuda/test_copysign_double.cu new file mode 100644 index 0000000000..0ddcdfa395 --- /dev/null +++ b/test/cuda/test_copysign_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::copysign(in[i], float_type(-1.0)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::copysign(h_A[i], float_type(-1.0))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_1E_double.cu b/test/cuda/test_ellint_1E_double.cu new file mode 100644 index 0000000000..4d4116aea7 --- /dev/null +++ b/test/cuda/test_ellint_1E_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_1(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_k_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < ellint_k_data.size(); ++i) + v.push_back(ellint_k_data[i][0]); + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ellint_1_double.cu b/test/cuda/test_ellint_1_double.cu new file mode 100644 index 0000000000..406e8e1040 --- /dev/null +++ b/test/cuda/test_ellint_1_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_1(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_f_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2; + + for(unsigned i = 0; i < ellint_f_data.size(); ++i) + { + v1.push_back(ellint_f_data[i][1]); + v2.push_back(ellint_f_data[i][0]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_1(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_2_double.cu b/test/cuda/test_ellint_2_double.cu new file mode 100644 index 0000000000..b7e4d42919 --- /dev/null +++ b/test/cuda/test_ellint_2_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_2(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_e2_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2; + + for(unsigned i = 0; i < ellint_e2_data.size(); ++i) + { + v1.push_back(ellint_e2_data[i][1]); + v2.push_back(ellint_e2_data[i][0]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_2(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_2c_double.cu b/test/cuda/test_ellint_2c_double.cu new file mode 100644 index 0000000000..6f8dd25608 --- /dev/null +++ b/test/cuda/test_ellint_2c_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_2(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_e_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < ellint_e_data.size(); ++i) + v.push_back(ellint_e_data[i][0]); + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_2(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ellint_3_double.cu b/test/cuda/test_ellint_3_double.cu new file mode 100644 index 0000000000..04fcca0b03 --- /dev/null +++ b/test/cuda/test_ellint_3_double.cu @@ -0,0 +1,120 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_pi3_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ellint_pi3_data.size(); ++i) + { + v1.push_back(ellint_pi3_data[i][2]); + v2.push_back(ellint_pi3_data[i][0]); + v3.push_back(ellint_pi3_data[i][1]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_3c_double.cu b/test/cuda/test_ellint_3c_double.cu new file mode 100644 index 0000000000..1c7928c9a0 --- /dev/null +++ b/test/cuda/test_ellint_3c_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_3(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_pi2_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2; + + for(unsigned i = 0; i < ellint_pi2_data.size(); ++i) + { + v1.push_back(ellint_pi2_data[i][1]); + v2.push_back(ellint_pi2_data[i][0]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_d_double.cu b/test/cuda/test_ellint_d_double.cu new file mode 100644 index 0000000000..b067ceefcf --- /dev/null +++ b/test/cuda/test_ellint_d_double.cu @@ -0,0 +1,122 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_d(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_d2_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < ellint_d2_data.size(); ++i) + { + v1.push_back(ellint_d2_data[i][1]); + v2.push_back(ellint_d2_data[i][0]); + } + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_d(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ellint_dc_double.cu b/test/cuda/test_ellint_dc_double.cu new file mode 100644 index 0000000000..42ec1f28d0 --- /dev/null +++ b/test/cuda/test_ellint_dc_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_d(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_d_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < ellint_d_data.size(); ++i) + v.push_back(ellint_d_data[i][0]); + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_d(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ellint_lambda_double.cu b/test/cuda/test_ellint_lambda_double.cu new file mode 100644 index 0000000000..a50ddbb17e --- /dev/null +++ b/test/cuda/test_ellint_lambda_double.cu @@ -0,0 +1,122 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::heuman_lambda(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../heuman_lambda_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < heuman_lambda_data.size(); ++i) + { + v1.push_back(heuman_lambda_data[i][1]); + v2.push_back(heuman_lambda_data[i][0]); + } + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 128; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::heuman_lambda(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10000) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ellint_rc_double.cu b/test/cuda/test_ellint_rc_double.cu new file mode 100644 index 0000000000..5b8a395216 --- /dev/null +++ b/test/cuda/test_ellint_rc_double.cu @@ -0,0 +1,117 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_rc(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_rc_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2; + + for(unsigned i = 0; i < ellint_rc_data.size(); ++i) + { + v1.push_back(ellint_rc_data[i][0]); + v2.push_back(ellint_rc_data[i][1]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_rc(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_rd_double.cu b/test/cuda/test_ellint_rd_double.cu new file mode 100644 index 0000000000..334fe5339b --- /dev/null +++ b/test/cuda/test_ellint_rd_double.cu @@ -0,0 +1,120 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_rd(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_rd_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ellint_rd_data.size(); ++i) + { + v1.push_back(ellint_rd_data[i][0]); + v2.push_back(ellint_rd_data[i][1]); + v3.push_back(ellint_rd_data[i][2]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_rd(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_rf_double.cu b/test/cuda/test_ellint_rf_double.cu new file mode 100644 index 0000000000..19e7604ba8 --- /dev/null +++ b/test/cuda/test_ellint_rf_double.cu @@ -0,0 +1,120 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_rf(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_rf_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ellint_rf_data.size(); ++i) + { + v1.push_back(ellint_rf_data[i][0]); + v2.push_back(ellint_rf_data[i][1]); + v3.push_back(ellint_rf_data[i][2]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_rf(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_rg_double.cu b/test/cuda/test_ellint_rg_double.cu new file mode 100644 index 0000000000..107c719227 --- /dev/null +++ b/test/cuda/test_ellint_rg_double.cu @@ -0,0 +1,120 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_rg(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_rg.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ellint_rg.size(); ++i) + { + v1.push_back(ellint_rg[i][0]); + v2.push_back(ellint_rg[i][1]); + v3.push_back(ellint_rg[i][2]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_rg(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_rj_double.cu b/test/cuda/test_ellint_rj_double.cu new file mode 100644 index 0000000000..d18c5ea6da --- /dev/null +++ b/test/cuda/test_ellint_rj_double.cu @@ -0,0 +1,124 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type* in2, const float_type* in3, const float_type* in4, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ellint_rj(in1[i], in2[i], in3[i], in4[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ellint_rj_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + std::vector v1, v2, v3, v4; + + for(unsigned i = 0; i < ellint_rj_data.size(); ++i) + { + v1.push_back(ellint_rj_data[i][0]); + v2.push_back(ellint_rj_data[i][1]); + v3.push_back(ellint_rj_data[i][2]); + v4.push_back(ellint_rj_data[i][4]); + } + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + cuda_managed_ptr input_vector4(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + input_vector4[i] = v4[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), input_vector4.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ellint_rj(input_vector1[i], input_vector2[i], input_vector3[i], input_vector4[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 15) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error found was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ellint_zeta_double.cu b/test/cuda/test_ellint_zeta_double.cu new file mode 100644 index 0000000000..dbab7a75aa --- /dev/null +++ b/test/cuda/test_ellint_zeta_double.cu @@ -0,0 +1,122 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::jacobi_zeta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../jacobi_zeta_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < jacobi_zeta_data.size(); ++i) + { + v1.push_back(jacobi_zeta_data[i][1]); + v2.push_back(jacobi_zeta_data[i][0]); + } + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 128; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::jacobi_zeta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_erf_double.cu b/test/cuda/test_erf_double.cu new file mode 100644 index 0000000000..78dc132266 --- /dev/null +++ b/test/cuda/test_erf_double.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_erf_float.cu b/test/cuda/test_erf_float.cu new file mode 100644 index 0000000000..c91175fdb7 --- /dev/null +++ b/test/cuda/test_erf_float.cu @@ -0,0 +1,99 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED in " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_erf_inv_double.cu b/test/cuda/test_erf_inv_double.cu new file mode 100644 index 0000000000..26220f7291 --- /dev/null +++ b/test/cuda/test_erf_inv_double.cu @@ -0,0 +1,105 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf_inv(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) x + +#include "../erf_inv_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % erf_inv_data.size(); + input_vector[i] = erf_inv_data[table_id][0]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_erf_inv_float.cu b/test/cuda/test_erf_inv_float.cu new file mode 100644 index 0000000000..9e6356f36f --- /dev/null +++ b/test/cuda/test_erf_inv_float.cu @@ -0,0 +1,105 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::erf_inv(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) x + +#include "../erf_inv_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % erf_inv_data.size(); + input_vector[i] = erf_inv_data[table_id][0]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf_inv(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_ex_val_cdf_double.cu b/test/cuda/test_ex_val_cdf_double.cu new file mode 100644 index 0000000000..db8ae2aace --- /dev/null +++ b/test/cuda/test_ex_val_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::extreme_value_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ex_val_cdf_float.cu b/test/cuda/test_ex_val_cdf_float.cu new file mode 100644 index 0000000000..faa35e8155 --- /dev/null +++ b/test/cuda/test_ex_val_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::extreme_value_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::extreme_value_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ex_val_pdf_double.cu b/test/cuda/test_ex_val_pdf_double.cu new file mode 100644 index 0000000000..bc1666c3d2 --- /dev/null +++ b/test/cuda/test_ex_val_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::extreme_value_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ex_val_pdf_float.cu b/test/cuda/test_ex_val_pdf_float.cu new file mode 100644 index 0000000000..aa6baf659c --- /dev/null +++ b/test/cuda/test_ex_val_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::extreme_value_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::extreme_value_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ex_val_quan_double.cu b/test/cuda/test_ex_val_quan_double.cu new file mode 100644 index 0000000000..f3d2cf946f --- /dev/null +++ b/test/cuda/test_ex_val_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 3000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ex_val_quan_float.cu b/test/cuda/test_ex_val_quan_float.cu new file mode 100644 index 0000000000..e622c9b291 --- /dev/null +++ b/test/cuda/test_ex_val_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::extreme_value_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::extreme_value_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 3000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exp_cdf_double.cu b/test/cuda/test_exp_cdf_double.cu new file mode 100644 index 0000000000..55c4cc0bc8 --- /dev/null +++ b/test/cuda/test_exp_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::exponential_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exp_cdf_float.cu b/test/cuda/test_exp_cdf_float.cu new file mode 100644 index 0000000000..682e7d8d24 --- /dev/null +++ b/test/cuda/test_exp_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::exponential_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::exponential_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exp_pdf_double.cu b/test/cuda/test_exp_pdf_double.cu new file mode 100644 index 0000000000..8cddeae2f8 --- /dev/null +++ b/test/cuda/test_exp_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::exponential_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exp_pdf_float.cu b/test/cuda/test_exp_pdf_float.cu new file mode 100644 index 0000000000..b2d63b4cab --- /dev/null +++ b/test/cuda/test_exp_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::exponential_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::exponential_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exponential_quan_double.cu b/test/cuda/test_exponential_quan_double.cu new file mode 100644 index 0000000000..5f00a056c8 --- /dev/null +++ b/test/cuda/test_exponential_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_exponential_quan_float.cu b/test/cuda/test_exponential_quan_float.cu new file mode 100644 index 0000000000..7790e220d3 --- /dev/null +++ b/test/cuda/test_exponential_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::exponential_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::exponential_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_fpclassify_double.cu b/test/cuda/test_fpclassify_double.cu new file mode 100644 index 0000000000..880dfeb62d --- /dev/null +++ b/test/cuda/test_fpclassify_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::fpclassify(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::fpclassify(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_gamma_cdf_double.cu b/test/cuda/test_gamma_cdf_double.cu new file mode 100644 index 0000000000..dd319ad35f --- /dev/null +++ b/test/cuda/test_gamma_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::gamma_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_cdf_float.cu b/test/cuda/test_gamma_cdf_float.cu new file mode 100644 index 0000000000..9f3847aa79 --- /dev/null +++ b/test/cuda/test_gamma_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::gamma_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::gamma_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_p_deriv_double.cu b/test/cuda/test_gamma_p_deriv_double.cu new file mode 100644 index 0000000000..30b2eac393 --- /dev/null +++ b/test/cuda/test_gamma_p_deriv_double.cu @@ -0,0 +1,124 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../igamma_med_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < igamma_med_data.size(); ++i) + { + v1.push_back(igamma_med_data[i][0]); + v2.push_back(igamma_med_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_p_deriv_float.cu b/test/cuda/test_gamma_p_deriv_float.cu new file mode 100644 index 0000000000..2ba810bd6c --- /dev/null +++ b/test/cuda/test_gamma_p_deriv_float.cu @@ -0,0 +1,136 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../igamma_med_data.ipp" +#include "../igamma_big_data.ipp" +#include "../igamma_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < igamma_med_data.size(); ++i) + { + v1.push_back(igamma_med_data[i][0]); + v2.push_back(igamma_med_data[i][1]); + } + for(unsigned i = 0; i < igamma_big_data.size(); ++i) + { + v1.push_back(igamma_big_data[i][0]); + v2.push_back(igamma_big_data[i][1]); + } + for(unsigned i = 0; i < igamma_small_data.size(); ++i) + { + v1.push_back(igamma_small_data[i][0]); + v2.push_back(igamma_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_p_double.cu b/test/cuda/test_gamma_p_double.cu new file mode 100644 index 0000000000..a243582fb8 --- /dev/null +++ b/test/cuda/test_gamma_p_double.cu @@ -0,0 +1,136 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../igamma_med_data.ipp" +#include "../igamma_big_data.ipp" +#include "../igamma_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < igamma_med_data.size(); ++i) + { + v1.push_back(igamma_med_data[i][0]); + v2.push_back(igamma_med_data[i][1]); + } + for(unsigned i = 0; i < igamma_big_data.size(); ++i) + { + v1.push_back(igamma_big_data[i][0]); + v2.push_back(igamma_big_data[i][1]); + } + for(unsigned i = 0; i < igamma_small_data.size(); ++i) + { + v1.push_back(igamma_small_data[i][0]); + v2.push_back(igamma_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_p_float.cu b/test/cuda/test_gamma_p_float.cu new file mode 100644 index 0000000000..f9510c33df --- /dev/null +++ b/test/cuda/test_gamma_p_float.cu @@ -0,0 +1,136 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::gamma_p(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../igamma_med_data.ipp" +#include "../igamma_big_data.ipp" +#include "../igamma_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < igamma_med_data.size(); ++i) + { + v1.push_back(igamma_med_data[i][0]); + v2.push_back(igamma_med_data[i][1]); + } + for(unsigned i = 0; i < igamma_big_data.size(); ++i) + { + v1.push_back(igamma_big_data[i][0]); + v2.push_back(igamma_big_data[i][1]); + } + for(unsigned i = 0; i < igamma_small_data.size(); ++i) + { + v1.push_back(igamma_small_data[i][0]); + v2.push_back(igamma_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::gamma_p(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_pdf_double.cu b/test/cuda/test_gamma_pdf_double.cu new file mode 100644 index 0000000000..f8d67d4974 --- /dev/null +++ b/test/cuda/test_gamma_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::gamma_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_gamma_pdf_float.cu b/test/cuda/test_gamma_pdf_float.cu new file mode 100644 index 0000000000..205e8221e0 --- /dev/null +++ b/test/cuda/test_gamma_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::gamma_distribution(2), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::gamma_distribution(2), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_cdf_double.cu b/test/cuda/test_geo_cdf_double.cu new file mode 100644 index 0000000000..adc9de66bb --- /dev/null +++ b/test/cuda/test_geo_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_cdf_float.cu b/test/cuda/test_geo_cdf_float.cu new file mode 100644 index 0000000000..eeac8f7774 --- /dev/null +++ b/test/cuda/test_geo_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_pdf_double.cu b/test/cuda/test_geo_pdf_double.cu new file mode 100644 index 0000000000..6bd3da0f66 --- /dev/null +++ b/test/cuda/test_geo_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_pdf_float.cu b/test/cuda/test_geo_pdf_float.cu new file mode 100644 index 0000000000..9160fc73a9 --- /dev/null +++ b/test/cuda/test_geo_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_quan_double.cu b/test/cuda/test_geo_quan_double.cu new file mode 100644 index 0000000000..55e8d76934 --- /dev/null +++ b/test/cuda/test_geo_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_geo_quan_float.cu b/test/cuda/test_geo_quan_float.cu new file mode 100644 index 0000000000..e368406970 --- /dev/null +++ b/test/cuda/test_geo_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::geometric_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::geometric_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ibeta_derivative_double.cu b/test/cuda/test_ibeta_derivative_double.cu new file mode 100644 index 0000000000..cdb6a7a8b9 --- /dev/null +++ b/test/cuda/test_ibeta_derivative_double.cu @@ -0,0 +1,148 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ibeta_derivative_data.ipp" +#include "../ibeta_derivative_int_data.ipp" +#include "../ibeta_derivative_large_data.ipp" +#include "../ibeta_derivative_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_derivative_data.size(); ++i) + { + v1.push_back(ibeta_derivative_data[i][0]); + v2.push_back(ibeta_derivative_data[i][1]); + v3.push_back(ibeta_derivative_data[i][2]); + } + for(unsigned i = 0; i < ibeta_derivative_int_data.size(); ++i) + { + v1.push_back(ibeta_derivative_int_data[i][0]); + v2.push_back(ibeta_derivative_int_data[i][1]); + v3.push_back(ibeta_derivative_int_data[i][2]); + } + for(unsigned i = 0; i < ibeta_derivative_large_data.size(); ++i) + { + v1.push_back(ibeta_derivative_large_data[i][0]); + v2.push_back(ibeta_derivative_large_data[i][1]); + v3.push_back(ibeta_derivative_large_data[i][2]); + } + for(unsigned i = 0; i < ibeta_derivative_small_data.size(); ++i) + { + v1.push_back(ibeta_derivative_small_data[i][0]); + v2.push_back(ibeta_derivative_small_data[i][1]); + v3.push_back(ibeta_derivative_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_ibeta_derivative_float.cu b/test/cuda/test_ibeta_derivative_float.cu new file mode 100644 index 0000000000..9ef7d7b6e1 --- /dev/null +++ b/test/cuda/test_ibeta_derivative_float.cu @@ -0,0 +1,151 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, const float_type* in3, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../ibeta_derivative_data.ipp" +#include "../ibeta_derivative_int_data.ipp" +/* +#include "../ibeta_derivative_large_data.ipp" +*/ +#include "../ibeta_derivative_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2, v3; + + for(unsigned i = 0; i < ibeta_derivative_data.size(); ++i) + { + v1.push_back(ibeta_derivative_data[i][0]); + v2.push_back(ibeta_derivative_data[i][1]); + v3.push_back(ibeta_derivative_data[i][2]); + } + for(unsigned i = 0; i < ibeta_derivative_int_data.size(); ++i) + { + v1.push_back(ibeta_derivative_int_data[i][0]); + v2.push_back(ibeta_derivative_int_data[i][1]); + v3.push_back(ibeta_derivative_int_data[i][2]); + } + /* + for(unsigned i = 0; i < ibeta_derivative_large_data.size(); ++i) + { + v1.push_back(ibeta_derivative_large_data[i][0]); + v2.push_back(ibeta_derivative_large_data[i][1]); + v3.push_back(ibeta_derivative_large_data[i][2]); + }*/ + for(unsigned i = 0; i < ibeta_derivative_small_data.size(); ++i) + { + v1.push_back(ibeta_derivative_small_data[i][0]); + v2.push_back(ibeta_derivative_small_data[i][1]); + v3.push_back(ibeta_derivative_small_data[i][2]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr input_vector3(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + input_vector3[i] = v3[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_chi_sq_cdf_double.cu b/test/cuda/test_inv_chi_sq_cdf_double.cu new file mode 100644 index 0000000000..cac6cbd9cc --- /dev/null +++ b/test/cuda/test_inv_chi_sq_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_chi_squared_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_chi_sq_cdf_float.cu b/test/cuda/test_inv_chi_sq_cdf_float.cu new file mode 100644 index 0000000000..d720eb7a47 --- /dev/null +++ b/test/cuda/test_inv_chi_sq_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_chi_squared_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_chi_squared_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_chi_sq_pdf_double.cu b/test/cuda/test_inv_chi_sq_pdf_double.cu new file mode 100644 index 0000000000..0bb40af3af --- /dev/null +++ b/test/cuda/test_inv_chi_sq_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_chi_squared_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_chi_sq_pdf_float.cu b/test/cuda/test_inv_chi_sq_pdf_float.cu new file mode 100644 index 0000000000..95b6868d92 --- /dev/null +++ b/test/cuda/test_inv_chi_sq_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_chi_squared_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_chi_squared_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gam_cdf_double.cu b/test/cuda/test_inv_gam_cdf_double.cu new file mode 100644 index 0000000000..847675252f --- /dev/null +++ b/test/cuda/test_inv_gam_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gamma_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gam_cdf_float.cu b/test/cuda/test_inv_gam_cdf_float.cu new file mode 100644 index 0000000000..5f11a6f8ed --- /dev/null +++ b/test/cuda/test_inv_gam_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gamma_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gamma_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gam_pdf_double.cu b/test/cuda/test_inv_gam_pdf_double.cu new file mode 100644 index 0000000000..77020cfffb --- /dev/null +++ b/test/cuda/test_inv_gam_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gamma_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gam_pdf_float.cu b/test/cuda/test_inv_gam_pdf_float.cu new file mode 100644 index 0000000000..b6fbe1838e --- /dev/null +++ b/test/cuda/test_inv_gam_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gamma_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gamma_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gaus_cdf_double.cu b/test/cuda/test_inv_gaus_cdf_double.cu new file mode 100644 index 0000000000..68b5f2f262 --- /dev/null +++ b/test/cuda/test_inv_gaus_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gaussian_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gaus_cdf_float.cu b/test/cuda/test_inv_gaus_cdf_float.cu new file mode 100644 index 0000000000..d498aeeec7 --- /dev/null +++ b/test/cuda/test_inv_gaus_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::inverse_gaussian_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::inverse_gaussian_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gaus_pdf_double.cu b/test/cuda/test_inv_gaus_pdf_double.cu new file mode 100644 index 0000000000..404d569e25 --- /dev/null +++ b/test/cuda/test_inv_gaus_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gaussian_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_inv_gaus_pdf_float.cu b/test/cuda/test_inv_gaus_pdf_float.cu new file mode 100644 index 0000000000..4e319d66ae --- /dev/null +++ b/test/cuda/test_inv_gaus_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::inverse_gaussian_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::inverse_gaussian_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_isfinite_double.cu b/test/cuda/test_isfinite_double.cu new file mode 100644 index 0000000000..96673cf86a --- /dev/null +++ b/test/cuda/test_isfinite_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isfinite(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isfinite(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_isinf_double.cu b/test/cuda/test_isinf_double.cu new file mode 100644 index 0000000000..1f9358dbc9 --- /dev/null +++ b/test/cuda/test_isinf_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isinf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isinf(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_isnan_double.cu b/test/cuda/test_isnan_double.cu new file mode 100644 index 0000000000..149307e327 --- /dev/null +++ b/test/cuda/test_isnan_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isnan(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isnan(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_isnormal_double.cu b/test/cuda/test_isnormal_double.cu new file mode 100644 index 0000000000..8a5ef262a2 --- /dev/null +++ b/test/cuda/test_isnormal_double.cu @@ -0,0 +1,113 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isnormal(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isnormal(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_laplace_cdf_double.cu b/test/cuda/test_laplace_cdf_double.cu new file mode 100644 index 0000000000..3e3f013a2e --- /dev/null +++ b/test/cuda/test_laplace_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_laplace_cdf_float.cu b/test/cuda/test_laplace_cdf_float.cu new file mode 100644 index 0000000000..3478d53523 --- /dev/null +++ b/test/cuda/test_laplace_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::laplace_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::laplace_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_laplace_pdf_double.cu b/test/cuda/test_laplace_pdf_double.cu new file mode 100644 index 0000000000..f65fbb0d60 --- /dev/null +++ b/test/cuda/test_laplace_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::laplace_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_laplace_pdf_float.cu b/test/cuda/test_laplace_pdf_float.cu new file mode 100644 index 0000000000..c4d52380c1 --- /dev/null +++ b/test/cuda/test_laplace_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::laplace_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::laplace_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_laplace_quan_double.cu b/test/cuda/test_laplace_quan_double.cu new file mode 100644 index 0000000000..c22f53d895 --- /dev/null +++ b/test/cuda/test_laplace_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::laplace_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::laplace_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_laplace_quan_float.cu b/test/cuda/test_laplace_quan_float.cu new file mode 100644 index 0000000000..0133d6d7f6 --- /dev/null +++ b/test/cuda/test_laplace_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::laplace_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::laplace_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lgamma_double.cu b/test/cuda/test_lgamma_double.cu new file mode 100644 index 0000000000..d1ddf645ee --- /dev/null +++ b/test/cuda/test_lgamma_double.cu @@ -0,0 +1,128 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = ::lgamma(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../test_gamma_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < factorials.size(); ++i) + v.push_back(factorials[i][0]); + for(unsigned i = 0; i < near_1.size(); ++i) + v.push_back(near_1[i][0]); + for(unsigned i = 0; i < near_2.size(); ++i) + v.push_back(near_2[i][0]); + for(unsigned i = 0; i < near_0.size(); ++i) + v.push_back(near_0[i][0]); + for(unsigned i = 0; i < near_m10.size(); ++i) + v.push_back(near_m10[i][0]); + for(unsigned i = 0; i < near_m55.size(); ++i) + v.push_back(near_m55[i][0]); + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lgamma_float.cu b/test/cuda/test_lgamma_float.cu new file mode 100644 index 0000000000..a038d200d6 --- /dev/null +++ b/test/cuda/test_lgamma_float.cu @@ -0,0 +1,128 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../test_gamma_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < factorials.size(); ++i) + v.push_back(factorials[i][0]); + for(unsigned i = 0; i < near_1.size(); ++i) + v.push_back(near_1[i][0]); + for(unsigned i = 0; i < near_2.size(); ++i) + v.push_back(near_2[i][0]); + for(unsigned i = 0; i < near_0.size(); ++i) + v.push_back(near_0[i][0]); + for(unsigned i = 0; i < near_m10.size(); ++i) + v.push_back(near_m10[i][0]); + for(unsigned i = 0; i < near_m55.size(); ++i) + v.push_back(near_m55[i][0]); + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_cdf_double.cu b/test/cuda/test_logistic_cdf_double.cu new file mode 100644 index 0000000000..0fb31cc25a --- /dev/null +++ b/test/cuda/test_logistic_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::logistic_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_cdf_float.cu b/test/cuda/test_logistic_cdf_float.cu new file mode 100644 index 0000000000..9250121f05 --- /dev/null +++ b/test/cuda/test_logistic_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::logistic_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::logistic_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_pdf_double.cu b/test/cuda/test_logistic_pdf_double.cu new file mode 100644 index 0000000000..14aca5110b --- /dev/null +++ b/test/cuda/test_logistic_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::logistic_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_pdf_float.cu b/test/cuda/test_logistic_pdf_float.cu new file mode 100644 index 0000000000..03c4965ce0 --- /dev/null +++ b/test/cuda/test_logistic_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::logistic_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::logistic_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_quan_double.cu b/test/cuda/test_logistic_quan_double.cu new file mode 100644 index 0000000000..002cb3c3e2 --- /dev/null +++ b/test/cuda/test_logistic_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::logistic_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_logistic_quan_float.cu b/test/cuda/test_logistic_quan_float.cu new file mode 100644 index 0000000000..a8e7d6e37c --- /dev/null +++ b/test/cuda/test_logistic_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::logistic_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::logistic_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_cdf_double.cu b/test/cuda/test_lognorm_cdf_double.cu new file mode 100644 index 0000000000..06e7a2bee2 --- /dev/null +++ b/test/cuda/test_lognorm_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::lognormal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_cdf_float.cu b/test/cuda/test_lognorm_cdf_float.cu new file mode 100644 index 0000000000..6ee85de268 --- /dev/null +++ b/test/cuda/test_lognorm_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::lognormal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::lognormal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_pdf_double.cu b/test/cuda/test_lognorm_pdf_double.cu new file mode 100644 index 0000000000..2eaf6f9923 --- /dev/null +++ b/test/cuda/test_lognorm_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::lognormal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_pdf_float.cu b/test/cuda/test_lognorm_pdf_float.cu new file mode 100644 index 0000000000..b5574b0f1f --- /dev/null +++ b/test/cuda/test_lognorm_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::lognormal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::lognormal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_quan_double.cu b/test/cuda/test_lognorm_quan_double.cu new file mode 100644 index 0000000000..014fdd53cf --- /dev/null +++ b/test/cuda/test_lognorm_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::lognormal_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_lognorm_quan_float.cu b/test/cuda/test_lognorm_quan_float.cu new file mode 100644 index 0000000000..88069c2439 --- /dev/null +++ b/test/cuda/test_lognorm_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::lognormal_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::lognormal_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_modf_double.cu b/test/cuda/test_modf_double.cu new file mode 100644 index 0000000000..0a99316011 --- /dev/null +++ b/test/cuda/test_modf_double.cu @@ -0,0 +1,104 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_normal_cdf_double.cu b/test/cuda/test_normal_cdf_double.cu new file mode 100644 index 0000000000..f1d2fe0334 --- /dev/null +++ b/test/cuda/test_normal_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::normal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_normal_cdf_float.cu b/test/cuda/test_normal_cdf_float.cu new file mode 100644 index 0000000000..dc1485ab8f --- /dev/null +++ b/test/cuda/test_normal_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::normal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::normal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_normal_pdf_double.cu b/test/cuda/test_normal_pdf_double.cu new file mode 100644 index 0000000000..ea83e2ac91 --- /dev/null +++ b/test/cuda/test_normal_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::normal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_normal_pdf_float.cu b/test/cuda/test_normal_pdf_float.cu new file mode 100644 index 0000000000..5738b43b20 --- /dev/null +++ b/test/cuda/test_normal_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::normal_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::normal_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_normal_quan_double.cu b/test/cuda/test_normal_quan_double.cu new file mode 100644 index 0000000000..2f01bfe513 --- /dev/null +++ b/test/cuda/test_normal_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::normal_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_normal_quan_float.cu b/test/cuda/test_normal_quan_float.cu new file mode 100644 index 0000000000..f8de25f4b0 --- /dev/null +++ b/test/cuda/test_normal_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::normal_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::normal_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_cdf_double.cu b/test/cuda/test_pareto_cdf_double.cu new file mode 100644 index 0000000000..4908095d7c --- /dev/null +++ b/test/cuda/test_pareto_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_cdf_float.cu b/test/cuda/test_pareto_cdf_float.cu new file mode 100644 index 0000000000..0d1709ba28 --- /dev/null +++ b/test/cuda/test_pareto_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_pdf_double.cu b/test/cuda/test_pareto_pdf_double.cu new file mode 100644 index 0000000000..fb1e8526a6 --- /dev/null +++ b/test/cuda/test_pareto_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_pdf_float.cu b/test/cuda/test_pareto_pdf_float.cu new file mode 100644 index 0000000000..4ea1df02c9 --- /dev/null +++ b/test/cuda/test_pareto_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_quan_double.cu b/test/cuda/test_pareto_quan_double.cu new file mode 100644 index 0000000000..f336a3884e --- /dev/null +++ b/test/cuda/test_pareto_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_pareto_quan_float.cu b/test/cuda/test_pareto_quan_float.cu new file mode 100644 index 0000000000..efe644f592 --- /dev/null +++ b/test/cuda/test_pareto_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_poisson_cdf_double.cu b/test/cuda/test_poisson_cdf_double.cu new file mode 100644 index 0000000000..957878c318 --- /dev/null +++ b/test/cuda/test_poisson_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_poisson_cdf_float.cu b/test/cuda/test_poisson_cdf_float.cu new file mode 100644 index 0000000000..ff91fc8939 --- /dev/null +++ b/test/cuda/test_poisson_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_poisson_pdf_double.cu b/test/cuda/test_poisson_pdf_double.cu new file mode 100644 index 0000000000..5322467f38 --- /dev/null +++ b/test/cuda/test_poisson_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_poisson_pdf_float.cu b/test/cuda/test_poisson_pdf_float.cu new file mode 100644 index 0000000000..bc6b50f3c6 --- /dev/null +++ b/test/cuda/test_poisson_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_cdf_double.cu b/test/cuda/test_rayleigh_cdf_double.cu new file mode 100644 index 0000000000..624c3b9620 --- /dev/null +++ b/test/cuda/test_rayleigh_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::rayleigh_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_cdf_float.cu b/test/cuda/test_rayleigh_cdf_float.cu new file mode 100644 index 0000000000..49eb228228 --- /dev/null +++ b/test/cuda/test_rayleigh_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::rayleigh_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::rayleigh_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_pdf_double.cu b/test/cuda/test_rayleigh_pdf_double.cu new file mode 100644 index 0000000000..2c01a45d02 --- /dev/null +++ b/test/cuda/test_rayleigh_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::rayleigh_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_pdf_float.cu b/test/cuda/test_rayleigh_pdf_float.cu new file mode 100644 index 0000000000..2a2fccc8f2 --- /dev/null +++ b/test/cuda/test_rayleigh_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::rayleigh_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::rayleigh_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_quan_double.cu b/test/cuda/test_rayleigh_quan_double.cu new file mode 100644 index 0000000000..bbb13da04e --- /dev/null +++ b/test/cuda/test_rayleigh_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::rayleigh_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_rayleigh_quan_float.cu b/test/cuda/test_rayleigh_quan_float.cu new file mode 100644 index 0000000000..0e84b6a844 --- /dev/null +++ b/test/cuda/test_rayleigh_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::rayleigh_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::rayleigh_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_round_double.cu b/test/cuda/test_round_double.cu new file mode 100644 index 0000000000..b72f977a21 --- /dev/null +++ b/test/cuda/test_round_double.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_sign_double.cu b/test/cuda/test_sign_double.cu new file mode 100644 index 0000000000..0266171239 --- /dev/null +++ b/test/cuda/test_sign_double.cu @@ -0,0 +1,115 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_signbit_double.cu b/test/cuda/test_signbit_double.cu new file mode 100644 index 0000000000..8f82825175 --- /dev/null +++ b/test/cuda/test_signbit_double.cu @@ -0,0 +1,115 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::signbit(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::signbit(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_std_erf_double.cu b/test/cuda/test_std_erf_double.cu new file mode 100644 index 0000000000..2a4427d834 --- /dev/null +++ b/test/cuda/test_std_erf_double.cu @@ -0,0 +1,99 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = ::erf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + double t = w.elapsed(); + std::cout << "CUDA kernal done in " << t << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::erf(h_A[i])); + t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_tgamma_double.cu b/test/cuda/test_tgamma_double.cu new file mode 100644 index 0000000000..9d34f81c68 --- /dev/null +++ b/test/cuda/test_tgamma_double.cu @@ -0,0 +1,128 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../test_gamma_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < factorials.size(); ++i) + v.push_back(factorials[i][0]); + for(unsigned i = 0; i < near_1.size(); ++i) + v.push_back(near_1[i][0]); + for(unsigned i = 0; i < near_2.size(); ++i) + v.push_back(near_2[i][0]); + for(unsigned i = 0; i < near_0.size(); ++i) + v.push_back(near_0[i][0]); + for(unsigned i = 0; i < near_m10.size(); ++i) + v.push_back(near_m10[i][0]); + for(unsigned i = 0; i < near_m55.size(); ++i) + v.push_back(near_m55[i][0]); + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_tgamma_float.cu b/test/cuda/test_tgamma_float.cu new file mode 100644 index 0000000000..49804ef439 --- /dev/null +++ b/test/cuda/test_tgamma_float.cu @@ -0,0 +1,128 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::tgamma(in[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "../test_gamma_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v; + + for(unsigned i = 0; i < factorials.size(); ++i) + v.push_back(factorials[i][0]); + for(unsigned i = 0; i < near_1.size(); ++i) + v.push_back(near_1[i][0]); + for(unsigned i = 0; i < near_2.size(); ++i) + v.push_back(near_2[i][0]); + for(unsigned i = 0; i < near_0.size(); ++i) + v.push_back(near_0[i][0]); + for(unsigned i = 0; i < near_m10.size(); ++i) + v.push_back(near_m10[i][0]); + for(unsigned i = 0; i < near_m55.size(); ++i) + v.push_back(near_m55[i][0]); + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v.size(); + input_vector[i] = v[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::tgamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_cdf_double.cu b/test/cuda/test_triangle_cdf_double.cu new file mode 100644 index 0000000000..9c71b02d0b --- /dev/null +++ b/test/cuda/test_triangle_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_cdf_float.cu b/test/cuda/test_triangle_cdf_float.cu new file mode 100644 index 0000000000..50bd41cb8d --- /dev/null +++ b/test/cuda/test_triangle_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_pdf_double.cu b/test/cuda/test_triangle_pdf_double.cu new file mode 100644 index 0000000000..aff61e7d2d --- /dev/null +++ b/test/cuda/test_triangle_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_pdf_float.cu b/test/cuda/test_triangle_pdf_float.cu new file mode 100644 index 0000000000..6fbc9ba5d1 --- /dev/null +++ b/test/cuda/test_triangle_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_quan_double.cu b/test/cuda/test_triangle_quan_double.cu new file mode 100644 index 0000000000..1880ed2a3a --- /dev/null +++ b/test/cuda/test_triangle_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_triangle_quan_float.cu b/test/cuda/test_triangle_quan_float.cu new file mode 100644 index 0000000000..8d0e701cfb --- /dev/null +++ b/test/cuda/test_triangle_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::triangular_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::triangular_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_trunc_double.cu b/test/cuda/test_trunc_double.cu new file mode 100644 index 0000000000..6d0bfde217 --- /dev/null +++ b/test/cuda/test_trunc_double.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/test_uniform_cdf_double.cu b/test/cuda/test_uniform_cdf_double.cu new file mode 100644 index 0000000000..c7c411914c --- /dev/null +++ b/test/cuda/test_uniform_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_uniform_cdf_float.cu b/test/cuda/test_uniform_cdf_float.cu new file mode 100644 index 0000000000..9ed322941d --- /dev/null +++ b/test/cuda/test_uniform_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_uniform_pdf_double.cu b/test/cuda/test_uniform_pdf_double.cu new file mode 100644 index 0000000000..dab9a47500 --- /dev/null +++ b/test/cuda/test_uniform_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_uniform_pdf_float.cu b/test/cuda/test_uniform_pdf_float.cu new file mode 100644 index 0000000000..01c835003c --- /dev/null +++ b/test/cuda/test_uniform_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_uniform_quan_double.cu b/test/cuda/test_uniform_quan_double.cu new file mode 100644 index 0000000000..3925ed3d36 --- /dev/null +++ b/test/cuda/test_uniform_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_uniform_quan_float.cu b/test/cuda/test_uniform_quan_float.cu new file mode 100644 index 0000000000..37dda9e76a --- /dev/null +++ b/test/cuda/test_uniform_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::uniform_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::uniform_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_cdf_double.cu b/test/cuda/test_weibull_cdf_double.cu new file mode 100644 index 0000000000..c8b040db86 --- /dev/null +++ b/test/cuda/test_weibull_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::weibull_distribution(1.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_cdf_float.cu b/test/cuda/test_weibull_cdf_float.cu new file mode 100644 index 0000000000..c7524a6c86 --- /dev/null +++ b/test/cuda/test_weibull_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::weibull_distribution(1.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::weibull_distribution(1.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_pdf_double.cu b/test/cuda/test_weibull_pdf_double.cu new file mode 100644 index 0000000000..3d310ff839 --- /dev/null +++ b/test/cuda/test_weibull_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::weibull_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_pdf_float.cu b/test/cuda/test_weibull_pdf_float.cu new file mode 100644 index 0000000000..8910f4dea1 --- /dev/null +++ b/test/cuda/test_weibull_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::weibull_distribution(1.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::weibull_distribution(1.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_quan_double.cu b/test/cuda/test_weibull_quan_double.cu new file mode 100644 index 0000000000..3757b640bc --- /dev/null +++ b/test/cuda/test_weibull_quan_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::weibull_distribution(1.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/test_weibull_quan_float.cu b/test/cuda/test_weibull_quan_float.cu new file mode 100644 index 0000000000..7968f1475a --- /dev/null +++ b/test/cuda/test_weibull_quan_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::weibull_distribution(1.5), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::weibull_distribution(1.5), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} +