diff --git a/doc/math.qbk b/doc/math.qbk index 711e88f8c6..3d184ad8d3 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -552,6 +552,13 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include distributions/dist_reference.qbk] [/includes all individual distribution.qbk files] [endmathpart] [/section:dist Statistical Distributions and Functions] +[mathpart vector_functionals Vector Functionals] +[include vector_functionals/univariate_statistics.qbk] +[include vector_functionals/bivariate_statistics.qbk] +[include vector_functionals/signal_statistics.qbk] +[include vector_functionals/norms.qbk] +[endmathpart] [/section:vector_functionals Vector Functionals] + [mathpart special Special Functions] [include sf/number_series.qbk] diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk index e5df37b05a..8880ba2cba 100644 --- a/doc/roots/roots.qbk +++ b/doc/roots/roots.qbk @@ -32,6 +32,9 @@ template Complex complex_newton(F f, Complex guess, int max_iterations = std::numeric_limits::digits); + template + auto quadratic_roots(T const & a, T const & b, T const & c); + }}} // namespaces boost::math::tools. [h4 Description] @@ -45,6 +48,8 @@ __halley and __schroder iteration. * `complex_newton` performs Newton's method on complex-analytic functions. +* `solve_quadratic` solves quadratic equations using various tricks to keep catastrophic cancellation from occurring in computation of the discriminant. + [variablelist Parameters of the real-valued root finding functions [[F f] [Type F must be a callable function object (or C++ lambda) that accepts one parameter and @@ -173,6 +178,25 @@ Finally, the derivative of /f/ must be continuous at the root or else non-roots An example usage of `complex_newton` is given in `examples/daubechies_coefficients.cpp`. +[h4 Quadratics] + +To solve a quadratic /ax/[super 2] + /bx/ + /c/ = 0, we may use + + auto [x0, x1] = boost::math::tools::quadratic_roots(a, b, c); + +If the roots are real, they are arranged so that `x0` \u2264 `x1`. +If the roots are complex and the inputs are real, `x0` and `x1` are both `std::numeric_limits::quiet_NaN()`. +In this case we must cast `a`, `b` and `c` to a complex type to extract the complex roots. +If `a`, `b` and `c` are integral, then the roots are of type double. +The routine is much faster if the fused-multiply-add instruction is available on your architecture. +If the fma is not available, the function resorts to slow emulation. +Finally, speed is improved if you compile for your particular architecture. +For instance, if you compile without any architecture flags, then the `std::fma` call compiles down to `call _fma`, +which dynamically chooses to emulate or execute the `vfmadd132sd` instruction based on the capabilities of the architecture. +If instead, you compile with (say) `-march=native` then no dynamic choice is made: +The `vfmadd132sd` instruction is always executed if available and emulation is used if not. + + [h4 Examples] See __root_finding_examples. diff --git a/doc/vector_functionals/bivariate_statistics.qbk b/doc/vector_functionals/bivariate_statistics.qbk new file mode 100644 index 0000000000..23b4a9ecff --- /dev/null +++ b/doc/vector_functionals/bivariate_statistics.qbk @@ -0,0 +1,75 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:bivariate_statistics Bivariate Statistics] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto covariance(Container const & u, Container const & v); + + template + auto means_and_covariance(Container const & u, Container const & v); + + template + auto correlation_coefficient(Container const & u, Container const & v); + +}}} +`` + +[heading Description] + +This file provides functions for computing bivariate statistics. + +[heading Covariance] + +Computes the population covariance of two datasets: + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + double cov_uv = boost::math::tools::covariance(u, v); + +The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al]. +The data is not modified. Requires a random-access container. +Works with real-valued inputs and does not work with complex-valued inputs. + +The algorithm used herein simultaneously generates the mean values of the input data /u/ and /v/. +For certain applications, it might be useful to get them in a single pass through the data. +As such, we provide `means_and_covariance`: + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_covariance(u, v); + +[heading Correlation Coefficient] + +Computes the [@https://en.wikipedia.org/wiki/Pearson_correlation_coefficient Pearson correlation coefficient] of two datasets /u/ and /v/: + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + double rho_uv = boost::math::tools::correlation_coefficient(u, v); + // rho_uv = 1. + +The data must be random access and cannot be complex. + +If one or both of the datasets is constant, the correlation coefficient is an indeterminant form (0/0) and definitions must be introduced to assign it a value. +We use the following: If both datasets are constant, then the correlation coefficient is 1. +If one dataset is constant, and the other is not, then the correlation coefficient is zero. + + +[heading References] + +* Bennett, Janine, et al. ['Numerically stable, single-pass, parallel statistics algorithms.] Cluster Computing and Workshops, 2009. CLUSTER'09. IEEE International Conference on. IEEE, 2009. + +[endsect] +[/section:bivariate_statistics Bivariate Statistics] diff --git a/doc/vector_functionals/norms.qbk b/doc/vector_functionals/norms.qbk new file mode 100644 index 0000000000..b809540b16 --- /dev/null +++ b/doc/vector_functionals/norms.qbk @@ -0,0 +1,259 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:norms Norms] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto l0_pseudo_norm(Container const & c); + + template + auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); + + template + size_t hamming_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + size_t hamming_distance(Container const & u, Container const & v); + + template + auto l1_norm(Container const & c); + + template + auto l1_norm(ForwardIterator first, ForwardIterator last); + + template + auto l1_distance(Container const & v1, Container const & v2); + + template + auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + auto l2_norm(Container const & c); + + template + auto l2_norm(ForwardIterator first, ForwardIterator last); + + template + auto l2_distance(Container const & v1, Container const & v2); + + template + auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + auto sup_norm(Container const & c); + + template + auto sup_norm(ForwardIterator first, ForwardIterator last); + + template + auto sup_distance(Container const & v1, Container const & v2); + + template + auto sup_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + auto lp_norm(Container const & c, unsigned p); + + template + auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p); + + template + auto lp_distance(Container const & v1, Container const & v2, unsigned p); + + template + auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2, unsigned p); + + template + auto total_variation(Container const & c); + + template + auto total_variation(ForwardIterator first, ForwardIterator last); + +}}} +`` + +[heading Description] + +The file `boost/math/tools/norms.hpp` is a set of facilities for computing scalar values traditionally useful in numerical analysis from vectors. + +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, an Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +Integral datatypes are supported for most routines. + +[heading \u2113[super \u221E] norm] + +Computes the supremum norm of a dataset: + + std::vector v{-3, 2, 1}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // sup = 3 + + std::vector> v{{0, -8}, {1,1}, {-3,2}}; + // Range call: + double sup = boost::math::tools::sup_norm(v); + // sup = 8 + +Supports real, integral, and complex arithmetic. +Container must be forward iterable and is not modified. + +[heading \u2113[super \u221E] distance] + +Computes the supremum norm distance between two vectors: + + std::vector v{-3, 2, 1}; + std::vector w{6, -2, 1}; + double sup = boost::math::tools::sup_distance(w, v); + // sup = 9 + +Supports real, integral, and complex arithmetic. +Container must be forward iterable and is not modified. +If the input it integral, the output is a double precision float. + + +[heading \u2113[super /p/] norm] + + std::vector v{-8, 0, 0}; + double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 7); + // sup = 8 + + std::vector> v{{1, 0}, {0,1}, {0,-1}}; + double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); + // sup = cbrt(3) + +Supports both real, integral, and complex arithmetic. +If the input is integral, the output is a double precision float. +The container must be forward iterable and the contents are not modified. + +Only supports integral /p/ for two reasons: The computation is much slower for real /p/, and the non-integral \u2113[super /p/] norm is rarely used. + +[heading \u2113[super /p/] distance] + + std::vector v{-8, 0, 0}; + std::vector w{8, 0, 0}; + double dist = boost::math::tools::lp_distance(v, w, 7); + // dist = 16 + + std::vector> v{{1, 0}, {0,1}, {0,-1}}; + double dist = boost::math::tools::lp_distance(v, v, 3); + // dist = 0 + +Supports both real, integral, and complex arithmetic. +If the input is integral, the output is a double precision float. +The container must be forward iterable and the contents are not modified. + +Only supports integer /p/. + +[heading \u2113[super 0] pseudo-norm] + +Counts the number of non-zero elements in a container. + + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + // count = 1 + +Supports real, integral, and complex numbers. +The container must be forward iterable and the contents are not modified. +Note that this measure is not robust against numerical noise and is therefore not as useful as (say) the Hoyer sparsity in numerical applications. +Works with real, complex, and integral inputs. + +[heading Hamming Distance] + +Compute the number of non-equal elements between two vectors /w/ and /v/: + + std::vector v{0,0,1}; + std::vector w{1,0,0}; + size_t count = boost::math::tools::hamming_distance(w, v); + // count = 2 + +Works for any datatype for which the operator `!=` is defined. + +[heading \u2113[super 1] norm] + +The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + // l1 = 3 + +Requires a forward iterable input, does not modify input data, and works with real, integral, and complex numbers. + +[heading \u2113[super 1] distance] + +Computes the \u2113[super 1] distance between two vectors: + + std::vector v{1,1,1}; + std::vector w{1,1,1}; + double dist = boost::math::tools::l1_distance(w, v); + // dist = 0 + +Requires a forward iterable inputs, does not modify input data, and works with real, integral, and complex numbers. +If the input type is integral, the output is a double precision float. + + +[heading \u2113[super 2] norm] + +The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + // l2 = sqrt(3) + +Requires a forward iterable input, does not modify input data, and works with real, complex and integral data. +If the input is integral, the output is a double precision float. + + +[heading \u2113[super 2] distance] + +Compute the \u2113[super 2] distance between two vectors /w/ and /v/: + + std::vector v{1,1,1}; + std::vector w{1,2,1}; + double dist = boost::math::tools::l2_distance(w, v); + // dist = 1 + +Requires a forward iterable input, does not modify input data, and works with real, complex numbers, and integral data. +If the input type is integral, the output is a double precision float. + + +[heading Total Variation] + + std::vector v{1,1,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // no variation in v, so tv = 0. + v = {0,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // variation is 1, so tv = 1. + std::vector v{1,1,1}; + double tv = boost::math::tools::total_variation(v); + +The total variation only supports real numbers and integers. +If the input is integral, the output is a double precision float. + +All the constituent operations to compute the total variation are well-defined for complex numbers, +but the computed result is not meaningful; a 2D total variation is more appropriate. +The container must be forward iterable, and the contents are not modified. + +As an aside, the total variation is not technically a norm, since /TV(v) = 0/ does not imply /v = 0/. +However, it satisfies the triangle inequality and is absolutely 1-homogeneous, so it is a seminorm, and hence is grouped with the other norms here. + +[heading References] + +* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. +* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. +* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. + +[endsect] +[/section:norms Norms] diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk new file mode 100644 index 0000000000..5ef0e1db2e --- /dev/null +++ b/doc/vector_functionals/signal_statistics.qbk @@ -0,0 +1,208 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:signal_statistics Signal Statistics] + +[heading Synopsis] + +`` +#include + +namespace boost::math::tools { + + template + auto absolute_gini_coefficient(Container & c); + + template + auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto sample_absolute_gini_coefficient(Container & c); + + template + auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto hoyer_sparsity(Container const & c); + + template + auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); + + template + auto oracle_snr(Container const & signal, Container const & noisy_signal); + + template + auto oracle_snr_db(Container const & signal, Container const & noisy_signal); + + template + auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3); + + template + auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); + + template + auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3); + + template + auto m2m4_snr_estimator_db(Container const & noisy_signal,typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); + +} +`` + +[heading Description] + +The file `boost/math/tools/signal_statistics.hpp` is a set of facilities for computing quantities commonly used in signal analysis. + +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. + + +[heading Absolute Gini Coefficient] + +The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. +A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. +See [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard] for details. +However, for measuring sparsity, the phase of the numbers is irrelevant, so we provide the `absolute_gini_coefficient`: + + using boost::math::tools::sample_absolute_gini_coefficient; + using boost::math::tools::absolute_gini_coefficient; + std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; + double abs_gini = sample_absolute_gini_coefficient(v); + // now abs_gini = 1; maximally unequal + + std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; + abs_gini = absolute_gini_coefficient(w); + // now abs_gini = 0; every element of the vector has equal magnitude + + std::vector u{-1, 1, -1}; + abs_gini = absolute_gini_coefficient(u); + // now abs_gini = 0 + // Alternative call useful for computing over subset of the input: + abs_gini = absolute_gini_coefficient(u.begin(), u.begin() + 1); + + +The sample Gini coefficient returns unity for a vector which has only one nonzero coefficient. +The population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. + +The sample Gini coefficient lacks one desirable property of the population Gini coefficient, +namely that "cloning" a vector has the same Gini coefficient; though cloning holds to very high accuracy with the sample Gini coefficient and can easily be recovered by a rescaling. + +If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), +consider calculating the Hoyer sparsity instead. + +[heading Hoyer Sparsity] + +The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. +As the name suggests, it is used to measure the sparsity of an expansion in some basis. + +The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). +For details, see [@http://www.jmlr.org/papers/volume5/hoyer04a/hoyer04a.pdf Hoyer] as well as [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. + +A few special cases will serve to clarify the intended use: +If /v/ has only one nonzero coefficient, the Hoyer sparsity attains its maxima of 1. +If the coefficients of /v/ all have the same magnitude, then the Hoyer sparsity attains its minima of zero. +If the elements of /v/ are uniformly distributed on an interval [0, /b/], then the Hoyer sparsity is approximately 0.133. + + +Usage: + + std::vector v{1,0,0}; + Real hs = boost::math::tools::hoyer_sparsity(v); + // hs = 1 + std::vector v{1,-1,1}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + // hs = 0 + +The container must be forward iterable and the contents are not modified. +Accepts real, complex, and integer inputs. +If the input is an integral type, the output is a double precision float. + + +[heading Oracle Signal-to-noise ratio] + +The function `oracle_snr` computes the ratio \u2016 /s/ \u2016[sub 2][super 2] / \u2016 /s/ - /x/ \u2016[sub 2][super 2], where /s/ is signal and /x/ is a noisy signal. +The function `oracle_snr_db` computes 10`log`[sub 10](\u2016 /s/ \u2016[super 2] / \u2016 /s/ - /x/ \u2016[super 2]). +The functions are so named because in general, one does not know how to decompose a real signal /x/ into /s/ + /w/ and as such /s/ is regarded as oracle information. +Hence this function is mainly useful for unit testing other SNR estimators. + +Usage: + + std::vector signal(500, 3.2); + std::vector noisy_signal(500); + // fill 'noisy_signal' signal + noise + double snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + double snr = boost::math::tools::oracle_snr(signal, noisy_signal); + +The input can be real, complex, or integral. +Integral inputs produce double precision floating point outputs. +The input data is not modified and must satisfy the requirements of a `RandomAccessContainer`. + +[heading /M/[sub 2]/M/[sub 4] SNR Estimation] + +Estimates the SNR of a noisy signal via the /M/[sub 2]/M/[sub 4] method. +See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] and [@https://doi.org/10.1109/ISIT.1994.394869 Matzner and Englberger] for details. + + std::vector noisy_signal(512); + // fill noisy_signal with data contaminated by Gaussian white noise: + double est_snr_db = boost::math::tools::m2m4_snr_estimator_db(noisy_signal); + +The /M/[sub 2]/M/[sub 4] SNR estimator is an "in-service" estimator, meaning that the estimate is made using the noisy, data-bearing signal, and does not require a background estimate. +This estimator has been found to be work best between roughly -3 and 15db, tending to overestimate the noise below -3db, and underestimate the noise above 15db. +See [@https://www.mdpi.com/2078-2489/8/3/75/pdf Xue et al] for details. + +The /M/[sub 2]/M/[sub 4] SNR estimator, by default, assumes that the kurtosis of the signal is 1 and the kurtosis of the noise is 3, the latter corresponding to Gaussian noise. +These parameters, however, can be overridden: + + std::vector noisy_signal(512); + // fill noisy_signal with the data: + double signal_kurtosis = 1.5; + // Noise is assumed to follow Laplace distribution, which has kurtosis of 6: + double noise_kurtosis = 6; + double est_snr = boost::math::tools::m2m4_snr_estimator_db(noisy_signal, signal_kurtosis, noise_kurtosis); + +Now, technically the method is a "blind SNR estimator", meaning that the no /a-priori/ information about the signal is required to use the method. +However, the performance of the method is /vastly/ better if you can come up with a better estimate of the signal and noise kurtosis. +How can we do this? Suppose we know that the SNR is much greater than 1. +Then we can estimate the signal kurtosis simply by using the noisy signal kurtosis. +If the SNR is much less than one, this method breaks down as the noisy signal kurtosis will tend to the noise kurtosis-though in this limit we have an excellent estimator of the noise kurtosis! +In addition, if you have a model of what your signal should look like, you can precompute the signal kurtosis. +For example, sinusoids have a kurtosis of 1.5. +See [@http://www.jcomputers.us/vol8/jcp0808-21.pdf here] for a study which uses estimates of this sort to improve the performance of the /M/[sub 2]/M/[sub 4] estimator. + + +/Nota bene/: The traditional definition of SNR is /not/ mean invariant. +By this we mean that if a constant is added to every sample of a signal, the SNR is changed. +For example, adding DC bias to a signal changes its SNR. +For most use cases, this is really not what you intend; for example a signal consisting of zeros plus Gaussian noise has an SNR of zero, +whereas a signal with a constant DC bias and random Gaussian noise might have a very large SNR. + +The /M/[sub 2]/M/[sub 4] SNR estimator is computed from mean-invariant quantities, +and hence it should really be compared to the mean-invariant SNR. + +/Nota bene/: This computation requires the solution of a system of quadratic equations involving the noise kurtosis, the signal kurtosis, and the second and fourth moments of the data. +There is no guarantee that a solution of this system exists for all value of these parameters, in fact nonexistence can easily be demonstrated for certain data. +If there is no solution to the system, then failure is communicated by returning NaNs. +This happens distressingly often; if a user is aware of any blind SNR estimators which do not suffer from this drawback, please open a github ticket and let us know. + +The author has not managed to fully characterize the conditions under which a real solution with /S > 0/ and /N >0/ exists. +However, a very intuitive example demonstrates why nonexistence can occur. +Suppose the signal and noise kurtosis are equal. +Then the method has no way to distinguish between the signal and the noise, and the solution is non-unique. + + +[heading References] + +* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. +* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. +* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. +* D. R. Pauluzzi and N. C. Beaulieu, ['A comparison of SNR estimation techniques for the AWGN channel,] IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. +* Hoyer, Patrik O. ['Non-negative matrix factorization with sparseness constraints.], Journal of machine learning research 5.Nov (2004): 1457-1469. + +[endsect] +[/section:signal_statistics Signal Statistics] diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk new file mode 100644 index 0000000000..e07430530a --- /dev/null +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -0,0 +1,239 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:univariate_statistics Univariate Statistics] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto mean(Container const & c); + + template + auto mean(ForwardIterator first, ForwardIterator last); + + template + auto variance(Container const & c); + + template + auto variance(ForwardIterator first, ForwardIterator last); + + template + auto sample_variance(Container const & c); + + template + auto sample_variance(ForwardIterator first, ForwardIterator last); + + template + auto skewness(Container const & c); + + template + auto skewness(ForwardIterator first, ForwardIterator last); + + template + auto kurtosis(Container const & c); + + template + auto kurtosis(ForwardIterator first, ForwardIterator last); + + template + auto excess_kurtosis(Container const & c); + + template + auto excess_kurtosis(ForwardIterator first, ForwardIterator last); + + template + auto first_four_moments(Container const & c); + + template + auto first_four_moments(ForwardIterator first, ForwardIterator last); + + template + auto median(Container & c); + + template + auto median(ForwardIterator first, ForwardIterator last); + + template + auto median_absolute_deviation(ForwardIterator first, ForwardIterator last, typename std::iterator_traits::value_type center=std::numeric_limits::quiet_NaN()); + + template + auto median_absolute_deviation(RandomAccessContainer v, typename RandomAccessContainer::value_type center=std::numeric_limits::quiet_NaN()); + + template + auto gini_coefficient(Container & c); + + template + auto gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto sample_gini_coefficient(Container & c); + + template + auto sample_gini_coefficient(ForwardIterator first, ForwardIterator last); + +}}} +`` + +[heading Description] + +The file `boost/math/tools/univariate_statistics.hpp` is a set of facilities for computing scalar values from vectors. + +Many of these functionals have trivial naive implementations, but experienced programmers will recognize that even trivial algorithms are easy to screw up, and that numerical instabilities often lurk in corner cases. +We have attempted to do our "due diligence" to root out these problems-scouring the literature for numerically stable algorithms for even the simplest of functionals. + +/Nota bene/: Some similar functionality is provided in [@https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html Boost Accumulators Framework]. +These accumulators should be used in real-time applications; `univariate_statistics.hpp` should be used when CPU vectorization is needed. +As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags. + +We now describe each functional in detail. +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +For certain operations (total variation, for example) integer inputs are supported. + +[heading Mean] + + std::vector v{1,2,3,4,5}; + double mu = boost::math::tools::mean(v.cbegin(), v.cend()); + // Alternative syntax if you want to use entire container: + mu = boost::math::tools::mean(v); + +The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. +The data is not modified and must be forward iterable. +Works with real and integer data. +If the input is an integer type, the output is a double precision float. + +[heading Variance] + + std::vector v{1,2,3,4,5}; + Real sigma_sq = boost::math::tools::variance(v.cbegin(), v.cend()); + +If you don't need to calculate on a subset of the input, then the range call is more terse: + + std::vector v{1,2,3,4,5}; + Real sigma_sq = boost::math::tools::variance(v); + +The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. +The input data must be forward iterable and the range `[first, last)` must contain at least two elements. +It is /not/ in general sensible to pass complex numbers to this routine. +If integers are passed as input, then the output is a double precision float. + +`boost::math::tools::variance` returns the population variance. +If you want a sample variance, use + + std::vector v{1,2,3,4,5}; + Real sn_sq = boost::math::tools::sample_variance(v); + + +[heading Skewness] + +Computes the skewness of a dataset: + + std::vector v{1,2,3,4,5}; + double skewness = boost::math::tools::skewness(v); + // skewness = 0. + +The input vector is not modified, works with integral and real data. +If the input data is integral, the output is a double precision float. + +For a dataset consisting of a single constant value, we take the skewness to be zero by definition. + +The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. + +[heading Kurtosis] + +Computes the kurtosis of a dataset: + + std::vector v{1,2,3,4,5}; + double kurtosis = boost::math::tools::kurtosis(v); + // kurtosis = 17/10 + +The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. +The input data must be forward iterable and must consist of real or integral values. +If the input data is integral, the output is a double precision float. +Note that this is /not/ the excess kurtosis. +If you require the excess kurtosis, use `boost::math::tools::excess_kurtosis`. +This function simply subtracts 3 from the kurtosis, but it makes eminently clear our definition of kurtosis. + +[heading First four moments] + +Simultaneously computes the first four [@https://en.wikipedia.org/wiki/Central_moment central moments] in a single pass through the data: + + std::vector v{1,2,3,4,5}; + auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(v); + + +[heading Median] + +Computes the median of a dataset: + + std::vector v{1,2,3,4,5}; + double m = boost::math::tools::median(v.begin(), v.end()); + +/Nota bene: The input vector is modified./ +The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. +Therefore, all requirements of `std::nth_element` are inherited by the median calculation. +In particular, the container must allow random access. + +[heading Median Absolute Deviation] + +Computes the [@https://en.wikipedia.org/wiki/Median_absolute_deviation median absolute deviation] of a dataset: + + std::vector v{1,2,3,4,5}; + double mad = boost::math::tools::median_absolute_deviation(v); + +By default, the deviation from the median is used. +If you have some prior that the median is zero, or wish to compute the median absolute deviation from the mean, +use the following: + + // prior is that center is zero: + double center = 0; + double mad = boost::math::tools::median_absolute_deviation(v, center); + + // compute median absolute deviation from the mean: + double mu = boost::math::tools::mean(v); + double mad = boost::math::tools::median_absolute_deviation(v, mu); + +/Nota bene:/ The input vector is modified. +Again the vector is passed into a call to [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. + +[heading Gini Coefficient] + +Compute the Gini coefficient of a dataset: + + std::vector v{1,0,0,0}; + double gini = boost::math::tools::gini_coefficient(v); + // gini = 3/4 + double s_gini = boost::math::tools::sample_gini_coefficient(v); + // s_gini = 1. + std::vector w{1,1,1,1}; + gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); + // gini = 0, as all elements are now equal. + +/Nota bene/: The input data is altered: in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators. + +The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. + +/Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. +However, a use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. +You should have /very/ good cause to pass negative values to the Gini coefficient calculator. +Another use case is found in signal processing, but the sorting is by magnitude and hence has a different implementation. +See `absolute_gini_coefficient` for details. + +[heading References] + +* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. +* Philippe P. Pébay: ["Formulas for Robust, One-Pass Parallel Computation of Covariances and Arbitrary-Order Statistical Moments.] Technical Report SAND2008-6212, Sandia National Laboratories, September 2008. + +[endsect] +[/section:univariate_statistics Univariate Statistics] diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp new file mode 100644 index 0000000000..20b7500ed3 --- /dev/null +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -0,0 +1,97 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP +#define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP + +#include +#include +#include +#include + + +namespace boost{ namespace math{ namespace tools { + +template +auto means_and_covariance(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample."); + + // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = (u[i] - mu_u)/(i+1); + Real v_tmp = v[i] - mu_v; + cov += i*u_tmp*v_tmp; + mu_u = mu_u + u_tmp; + mu_v = mu_v + v_tmp/(i+1); + } + + return std::make_tuple(mu_u, mu_v, cov/size(u)); +} + +template +auto covariance(Container const & u, Container const & v) +{ + auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v); + return cov; +} + +template +auto correlation_coefficient(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples."); + + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + Real Qu = 0; + Real Qv = 0; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = u[i] - mu_u; + Real v_tmp = v[i] - mu_v; + Qu = Qu + (i*u_tmp*u_tmp)/(i+1); + Qv = Qv + (i*v_tmp*v_tmp)/(i+1); + cov += i*u_tmp*v_tmp/(i+1); + mu_u = mu_u + u_tmp/(i+1); + mu_v = mu_v + v_tmp/(i+1); + } + + // If both datasets are constant, then they are perfectly correlated. + if (Qu == 0 && Qv == 0) + { + return Real(1); + } + // If one dataset is constant and the other isn't, then they have no correlation: + if (Qu == 0 || Qv == 0) + { + return Real(0); + } + + // Make sure rho in [-1, 1], even in the presence of numerical noise. + Real rho = cov/sqrt(Qu*Qv); + if (rho > 1) { + rho = 1; + } + if (rho < -1) { + rho = -1; + } + return rho; +} + +}}} +#endif diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp new file mode 100644 index 0000000000..478fe04db2 --- /dev/null +++ b/include/boost/math/tools/norms.hpp @@ -0,0 +1,640 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_NORMS_HPP +#define BOOST_MATH_TOOLS_NORMS_HPP +#include +#include +#include +#include +#include + + +namespace boost::math::tools { + +// Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: +template +auto total_variation(ForwardIterator first, ForwardIterator last) +{ + using T = typename std::iterator_traits::value_type; + using std::abs; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); + auto it = first; + if constexpr (std::is_unsigned::value) + { + T tmp = *it; + double tv = 0; + while (++it != last) + { + if (*it > tmp) + { + tv += *it - tmp; + } + else + { + tv += tmp - *it; + } + tmp = *it; + } + return tv; + } + else if constexpr (std::is_integral::value) + { + double tv = 0; + double tmp = *it; + while(++it != last) + { + double tmp2 = *it; + tv += abs(tmp2 - tmp); + tmp = *it; + } + return tv; + } + else + { + T tmp = *it; + T tv = 0; + while (++it != last) + { + tv += abs(*it - tmp); + tmp = *it; + } + return tv; + } +} + +template +inline auto total_variation(Container const & v) +{ + return total_variation(v.cbegin(), v.cend()); +} + + +template +auto sup_norm(ForwardIterator first, ForwardIterator last) +{ + BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); + using T = typename std::iterator_traits::value_type; + using std::abs; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + auto it = std::max_element(first, last, [](T a, T b) { return abs(b) > abs(a); }); + return abs(*it); + } + else if constexpr (std::is_unsigned::value) + { + return *std::max_element(first, last); + } + else + { + auto pair = std::minmax_element(first, last); + if (abs(*pair.first) > abs(*pair.second)) + { + return abs(*pair.first); + } + else + { + return abs(*pair.second); + } + } +} + +template +inline auto sup_norm(Container const & v) +{ + return sup_norm(v.cbegin(), v.cend()); +} + +template +auto l1_norm(ForwardIterator first, ForwardIterator last) +{ + using T = typename std::iterator_traits::value_type; + using std::abs; + if constexpr (std::is_unsigned::value) + { + double l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + } + return l1; + } + else if constexpr (std::is_integral::value) + { + double l1 = 0; + for (auto it = first; it != last; ++it) + { + double tmp = *it; + l1 += abs(tmp); + } + return l1; + } + else + { + decltype(abs(*first)) l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += abs(*it); + } + return l1; + } + +} + +template +inline auto l1_norm(Container const & v) +{ + return l1_norm(v.cbegin(), v.cend()); +} + + +template +auto l2_norm(ForwardIterator first, ForwardIterator last) +{ + using T = typename std::iterator_traits::value_type; + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + typedef typename T::value_type Real; + Real l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it); + } + Real result = sqrt(l2); + if (!isfinite(result)) + { + Real a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it/a); + } + return a*sqrt(l2); + } + return result; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += (*it)*(*it); + } + T result = sqrt(l2); + // Higham, Accuracy and Stability of Numerical Algorithms, + // Problem 27.5 presents a different algorithm to deal with overflow. + // The algorithm used here takes 3 passes *if* there is overflow. + // Higham's algorithm is 1 pass, but more requires operations than the no oveflow case. + // I'm operating under the assumption that overflow is rare since the dynamic range of floating point numbers is huge. + if (!isfinite(result)) + { + T a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + T tmp = *it/a; + l2 += tmp*tmp; + } + return a*sqrt(l2); + } + return result; + } + else + { + double l2 = 0; + for (auto it = first; it != last; ++it) + { + double tmp = *it; + l2 += tmp*tmp; + } + return sqrt(l2); + } +} + +template +inline auto l2_norm(Container const & v) +{ + return l2_norm(v.cbegin(), v.cend()); +} + +template +size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) +{ + using RealOrComplex = typename std::iterator_traits::value_type; + size_t count = 0; + for (auto it = first; it != last; ++it) + { + if (*it != RealOrComplex(0)) + { + ++count; + } + } + return count; +} + +template +inline size_t l0_pseudo_norm(Container const & v) +{ + return l0_pseudo_norm(v.cbegin(), v.cend()); +} + +template +size_t hamming_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + size_t count = 0; + auto it1 = first1; + auto it2 = first2; + while (it1 != last1) + { + if (*it1++ != *it2++) + { + ++count; + } + } + return count; +} + +template +inline size_t hamming_distance(Container const & v, Container const & w) +{ + return hamming_distance(v.cbegin(), v.cend(), w.cbegin()); +} + +template +auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p) +{ + using std::abs; + using std::pow; + using std::is_floating_point; + using std::isfinite; + using RealOrComplex = typename std::iterator_traits::value_type; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using std::norm; + using Real = typename RealOrComplex::value_type; + Real lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it), p); + } + + auto result = pow(lp, Real(1)/Real(p)); + if (!isfinite(result)) + { + auto a = boost::math::tools::sup_norm(first, last); + Real lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p); + } + result = a*pow(lp, Real(1)/Real(p)); + } + return result; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); + RealOrComplex lp = 0; + + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it), p); + } + + RealOrComplex result = pow(lp, RealOrComplex(1)/RealOrComplex(p)); + if (!isfinite(result)) + { + RealOrComplex a = boost::math::tools::sup_norm(first, last); + lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p); + } + result = a*pow(lp, RealOrComplex(1)/RealOrComplex(p)); + } + return result; + } + else + { + double lp = 0; + + for (auto it = first; it != last; ++it) + { + double tmp = *it; + lp += pow(abs(tmp), p); + } + double result = pow(lp, 1.0/double(p)); + if (!isfinite(result)) + { + double a = boost::math::tools::sup_norm(first, last); + lp = 0; + for (auto it = first; it != last; ++it) + { + double tmp = *it; + lp += pow(abs(tmp)/a, p); + } + result = a*pow(lp, double(1)/double(p)); + } + return result; + } +} + +template +inline auto lp_norm(Container const & v, unsigned p) +{ + return lp_norm(v.cbegin(), v.cend(), p); +} + + +template +auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2, unsigned p) +{ + using std::pow; + using std::abs; + using std::is_floating_point; + using std::isfinite; + using RealOrComplex = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename RealOrComplex::value_type; + using std::norm; + Real dist = 0; + while(it1 != last1) + { + auto tmp = *it1++ - *it2++; + dist += pow(abs(tmp), p); + } + return pow(dist, Real(1)/Real(p)); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + RealOrComplex dist = 0; + while(it1 != last1) + { + auto tmp = *it1++ - *it2++; + dist += pow(abs(tmp), p); + } + return pow(dist, RealOrComplex(1)/RealOrComplex(p)); + } + else + { + double dist = 0; + while(it1 != last1) + { + double tmp1 = *it1++; + double tmp2 = *it2++; + // Naively you'd expect the integer subtraction to be faster, + // but this can overflow or wraparound: + //double tmp = *it1++ - *it2++; + dist += pow(abs(tmp1 - tmp2), p); + } + return pow(dist, 1.0/double(p)); + } +} + +template +inline auto lp_distance(Container const & v, Container const & w, unsigned p) +{ + return lp_distance(v.cbegin(), v.cend(), w.cbegin(), p); +} + + +template +auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sum = 0; + while (it1 != last1) { + sum += abs(*it1++ - *it2++); + } + return sum; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sum = 0; + while (it1 != last1) + { + sum += abs(*it1++ - *it2++); + } + return sum; + } + else if constexpr (std::is_unsigned::value) + { + double sum = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + if (x1 > x2) + { + sum += (x1 - x2); + } + else + { + sum += (x2 - x1); + } + } + return sum; + } + else if constexpr (std::is_integral::value) + { + double sum = 0; + while(it1 != last1) + { + double x1 = *it1++; + double x2 = *it2++; + sum += abs(x1-x2); + } + return sum; + } + else + { + BOOST_ASSERT_MSG(false, "Could not recognize type."); + } + +} + +template +auto l1_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "L1 distance requires both containers to have the same number of elements"); + return l1_distance(v.cbegin(), v.cend(), w.begin()); +} + +template +auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sum = 0; + while (it1 != last1) { + sum += norm(*it1++ - *it2++); + } + return sqrt(sum); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sum = 0; + while (it1 != last1) + { + T tmp = *it1++ - *it2++; + sum += tmp*tmp; + } + return sqrt(sum); + } + else if constexpr (std::is_unsigned::value) + { + double sum = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + if (x1 > x2) + { + double tmp = x1-x2; + sum += tmp*tmp; + } + else + { + double tmp = x2 - x1; + sum += tmp*tmp; + } + } + return sqrt(sum); + } + else + { + double sum = 0; + while(it1 != last1) + { + double x1 = *it1++; + double x2 = *it2++; + double tmp = x1-x2; + sum += tmp*tmp; + } + return sqrt(sum); + } +} + +template +auto l2_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "L2 distance requires both containers to have the same number of elements"); + return l2_distance(v.cbegin(), v.cend(), w.begin()); +} + +template +auto sup_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sup_sq = 0; + while (it1 != last1) { + Real tmp = norm(*it1++ - *it2++); + if (tmp > sup_sq) { + sup_sq = tmp; + } + } + return sqrt(sup_sq); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sup = 0; + while (it1 != last1) + { + T tmp = *it1++ - *it2++; + if (sup < abs(tmp)) + { + sup = abs(tmp); + } + } + return sup; + } + else // integral values: + { + double sup = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + double tmp; + if (x1 > x2) + { + tmp = x1-x2; + } + else + { + tmp = x2 - x1; + } + if (sup < tmp) { + sup = tmp; + } + } + return sup; + } +} + +template +auto sup_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "sup distance requires both containers to have the same number of elements"); + return sup_distance(v.cbegin(), v.cend(), w.begin()); +} + + +} +#endif diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 62a24c2a5f..0cdf99bcb4 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -9,6 +9,9 @@ #ifdef _MSC_VER #pragma once #endif +#include // test for multiprecision types. +#include // test for complex types + #include #include #include @@ -649,6 +652,174 @@ Complex complex_newton(F g, Complex guess, int max_iterations=std::numeric_limit } #endif + +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) +// https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711 +namespace detail +{ + template + inline T discriminant(T const & a, T const & b, T const & c) + { + T w = 4*a*c; + T e = std::fma(-c, 4*a, w); + T f = std::fma(b, b, -w); + return f + e; + } +} + +template +auto quadratic_roots(T const& a, T const& b, T const& c) +{ + using std::copysign; + using std::sqrt; + if constexpr (std::is_integral::value) + { + // What I want is to write: + // return quadratic_roots(double(a), double(b), double(c)); + // but that doesn't compile. + double nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + double x0_sq = -double(c)/double(a); + if (x0_sq < 0) { + return std::pair(nan, nan); + } + double x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + double discriminant = detail::discriminant(double(a), double(b), double(c)); + if (discriminant < 0) + { + return std::pair(nan, nan); + } + double q = -(b + copysign(sqrt(discriminant), double(b)))/T(2); + double x0 = q/a; + double x1 = c/q; + if (x0 < x1) { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } + else if constexpr (std::is_floating_point::value) + { + T nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + T x0_sq = -c/a; + if (x0_sq < 0) { + return std::pair(nan, nan); + } + T x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + T discriminant = detail::discriminant(a, b, c); + // Is there a sane way to flush very small negative values to zero? + // If there is I don't know of it. + if (discriminant < 0) + { + return std::pair(nan, nan); + } + T q = -(b + copysign(sqrt(discriminant), b))/T(2); + T x0 = q/a; + T x1 = c/q; + if (x0 < x1) + { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } + else if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + typename T::value_type nan = std::numeric_limits::quiet_NaN(); + if(a.real()==0 && a.imag() ==0) + { + using std::norm; + if (b.real()==0 && b.imag() && norm(c) != 0) + { + return std::pair({nan, nan}, {nan, nan}); + } + else if (b.real()==0 && b.imag() && c.real() ==0 && c.imag() == 0) + { + return std::pair({0,0},{0,0}); + } + return std::pair(-c/b, -c/b); + } + if (b.real()==0 && b.imag() == 0) + { + T x0_sq = -c/a; + T x0 = sqrt(x0_sq); + return std::pair(-x0, x0); + } + // There's no fma for complex types: + T discriminant = b*b - T(4)*a*c; + T q = -(b + sqrt(discriminant))/T(2); + return std::pair(q/a, c/q); + } + else // Most likely the type is a boost.multiprecision. + { //There is no fma for multiprecision, and in addition it doesn't seem to be useful, so revert to the naive computation. + T nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + T x0_sq = -c/a; + if (x0_sq < 0) { + return std::pair(nan, nan); + } + T x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + T discriminant = b*b - 4*a*c; + if (discriminant < 0) + { + return std::pair(nan, nan); + } + T q = -(b + copysign(sqrt(discriminant), b))/T(2); + T x0 = q/a; + T x1 = c/q; + if (x0 < x1) + { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } +} +#endif + } // namespace tools } // namespace math } // namespace boost diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp new file mode 100644 index 0000000000..74f9dfd031 --- /dev/null +++ b/include/boost/math/tools/signal_statistics.hpp @@ -0,0 +1,346 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP +#define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP + +#include +#include +#include +#include +#include +#include +#include + + +namespace boost::math::tools { + +template +auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + using std::abs; + using RealOrComplex = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + + + decltype(abs(*first)) i = 1; + decltype(abs(*first)) num = 0; + decltype(abs(*first)) denom = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + num += tmp*i; + denom += tmp; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + decltype(abs(*first)) zero = 0; + return zero; + } + return ((2*num)/denom - i)/(i-1); +} + +template +inline auto absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); +} + +template +auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + return n*boost::math::tools::absolute_gini_coefficient(first, last)/(n-1); +} + +template +inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::tools::sample_absolute_gini_coefficient(v.begin(), v.end()); +} + + +// The Hoyer sparsity measure is defined in: +// https://arxiv.org/pdf/0811.4706.pdf +template +auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) +{ + using T = typename std::iterator_traits::value_type; + using std::abs; + using std::sqrt; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples."); + + if constexpr (std::is_unsigned::value) + { + T l1 = 0; + T l2 = 0; + size_t n = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + l2 += (*it)*(*it); + n += 1; + } + + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else { + decltype(abs(*first)) l1 = 0; + decltype(abs(*first)) l2 = 0; + // We wouldn't need to count the elements if it was a random access iterator, + // but our only constraint is that it's a forward iterator. + size_t n = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + l1 += tmp; + l2 += tmp*tmp; + n += 1; + } + if constexpr (std::is_integral::value) + { + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else + { + decltype(abs(*first)) rootn = sqrt(static_cast(n)); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + } +} + +template +inline auto hoyer_sparsity(Container const & v) +{ + return boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); +} + + +template +auto oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), + "Signal and noisy_signal must be have the same number of elements."); + if constexpr (std::is_integral::value) + { + double numerator = 0; + double denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (noisy_signal[i] - signal[i])*(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + return numerator/denominator; + } + else if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + + { + using std::norm; + typename Real::value_type numerator = 0; + typename Real::value_type denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += norm(signal[i]); + denominator += norm(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } + else + { + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } +} + +template +auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements."); + + Real mu = boost::math::tools::mean(signal); + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + Real tmp = signal[i] - mu; + numerator += tmp*tmp; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + +} + +template +auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::tools::mean_invariant_oracle_snr(signal, noisy_signal)); +} + + +// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. +template +auto oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::tools::oracle_snr(signal, noisy_signal)); +} + +// A good reference on the M2M4 estimator: +// D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. +// A nice python implementation: +// https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py +template +auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + BOOST_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive"); + BOOST_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive."); + using Real = typename std::iterator_traits::value_type; + using std::sqrt; + if constexpr (std::is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + // If we first eliminate N, we obtain the quadratic equation: + // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0 + // If we first eliminate S, we obtain the quadratic equation: + // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 + // I believe these equations are totally independent quadratics; + // if one has a complex solution it is not necessarily the case that the other must also. + // However, I can't prove that, so there is a chance that this does unnecessary work. + // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here. + // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711 + auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(first, last); + if (M4 == 0) + { + // The signal is constant. There is no noise: + return std::numeric_limits::infinity(); + } + // Change to notation in Pauluzzi, equation 41: + auto kw = estimated_noise_kurtosis; + auto ka = estimated_signal_kurtosis; + // A common case, since it's the default: + Real a = (ka+kw-6); + Real bs = 2*M2*(3-kw); + Real cs = kw*M2*M2 - M4; + Real bn = 2*M2*(3-ka); + Real cn = ka*M2*M2 - M4; + auto [S0, S1] = boost::math::tools::quadratic_roots(a, bs, cs); + if (S1 > 0) + { + auto N = M2 - S1; + if (N > 0) + { + return S1/N; + } + if (S0 > 0) + { + N = M2 - S0; + if (N > 0) + { + return S0/N; + } + } + } + auto [N0, N1] = boost::math::tools::quadratic_roots(a, bn, cn); + if (N1 > 0) + { + auto S = M2 - N1; + if (S > 0) + { + return S/N1; + } + if (N0 > 0) + { + S = M2 - N0; + if (S > 0) + { + return S/N0; + } + } + } + // This happens distressingly often. It's a limitation of the method. + return std::numeric_limits::quiet_NaN(); + } + else + { + BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); + return std::numeric_limits::quiet_NaN(); + } +} + +template +inline auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + return m2m4_snr_estimator(noisy_signal.cbegin(), noisy_signal.cend(), estimated_signal_kurtosis, estimated_noise_kurtosis); +} + +template +inline auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(first, last, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + + +template +inline auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + +} +#endif diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp new file mode 100644 index 0000000000..226fdf46d2 --- /dev/null +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -0,0 +1,393 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP +#define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP + +#include +#include +#include +#include +#include + + +namespace boost::math::tools { + +template +auto mean(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + if constexpr (std::is_integral::value) + { + double mu = 0; + double i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } + else + { + Real mu = 0; + Real i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } +} + +template +inline auto mean(Container const & v) +{ + return mean(v.cbegin(), v.cend()); +} + +template +auto variance(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); + // Higham, Accuracy and Stability, equation 1.6a and 1.6b: + if constexpr (std::is_integral::value) + { + double M = *first; + double Q = 0; + double k = 2; + for (auto it = std::next(first); it != last; ++it) + { + double tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + return Q/(k-1); + } + else + { + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real tmp = (*it - M)/k; + Q += k*(k-1)*tmp*tmp; + M += tmp; + k += 1; + } + return Q/(k-1); + } +} + +template +inline auto variance(Container const & v) +{ + return variance(v.cbegin(), v.cend()); +} + +template +auto sample_variance(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(first, last)/(n-1); +} + +template +inline auto sample_variance(Container const & v) +{ + return sample_variance(v.cbegin(), v.cend()); +} + + +// Follows equation 1.5 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto skewness(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double n = 2; + for (auto it = std::next(first); it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + double var = M2/(n-1); + if (var == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return double(0); + } + double skew = M3/(M2*sqrt(var)); + return skew; + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real n = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 += tmp*(n-1)*delta21; + M1 += tmp; + n += 1; + } + + Real var = M2/(n-1); + if (var == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return Real(0); + } + Real skew = M3/(M2*sqrt(var)); + return skew; + } +} + +template +inline auto skewness(Container const & v) +{ + return skewness(v.cbegin(), v.cend()); +} + +// Follows equation 1.5/1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto first_four_moments(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double M4 = 0; + double n = 2; + for (auto it = std::next(first); it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real M4 = 0; + Real n = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } +} + +template +inline auto first_four_moments(Container const & v) +{ + return first_four_moments(v.cbegin(), v.cend()); +} + + +// Follows equation 1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto kurtosis(ForwardIterator first, ForwardIterator last) +{ + auto [M1, M2, M3, M4] = first_four_moments(first, last); + if (M2 == 0) + { + return M2; + } + return M4/(M2*M2); +} + +template +inline auto kurtosis(Container const & v) +{ + return kurtosis(v.cbegin(), v.cend()); +} + +template +auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template +inline auto excess_kurtosis(Container const & v) +{ + return excess_kurtosis(v.cbegin(), v.cend()); +} + + +template +auto median(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + + +template +inline auto median(RandomAccessContainer & v) +{ + return median(v.begin(), v.end()); +} + +template +auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last); + if constexpr (std::is_integral::value) + { + double i = 1; + double num = 0; + double denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return double(0); + } + + return ((2*num)/denom - i)/(i-1); + } + else + { + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + + return ((2*num)/denom - i)/(i-1); + } +} + +template +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(v.begin(), v.end()); +} + +template +inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t n = std::distance(first, last); + return n*gini_coefficient(first, last)/(n-1); +} + +template +inline auto sample_gini_coefficient(RandomAccessContainer & v) +{ + return sample_gini_coefficient(v.begin(), v.end()); +} + +template +auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits::value_type center=std::numeric_limits::value_type>::quiet_NaN()) +{ + using std::abs; + using Real = typename std::iterator_traits::value_type; + using std::isnan; + if (isnan(center)) + { + center = boost::math::tools::median(first, last); + } + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);}; + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last, comparator); + return abs(*middle); + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last, comparator); + std::nth_element(middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); + } +} + +template +inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits::quiet_NaN()) +{ + return median_absolute_deviation(v.begin(), v.end(), center); +} + +} +#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 5ba495f9c9..881c3a7096 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -902,6 +902,10 @@ test-suite misc : [ run test_constant_generate.cpp : : : release USE_CPP_FLOAT=1 off:no ] [ run test_cubic_b_spline.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] # does not in fact require C++17 constexpr; requires C++17 std::size. + [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run bivariate_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp new file mode 100644 index 0000000000..528df4cd25 --- /dev/null +++ b/test/bivariate_statistics_test.cpp @@ -0,0 +1,170 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + +using boost::math::tools::means_and_covariance; +using boost::math::tools::covariance; + +template +void test_covariance() +{ + std::cout << std::setprecision(std::numeric_limits::digits10+1); + Real tol = std::numeric_limits::epsilon(); + using std::abs; + + // Covariance of a single thing is zero: + std::array u1{8}; + std::array v1{17}; + auto [mu_u1, mu_v1, cov1] = means_and_covariance(u1, v1); + + BOOST_TEST(abs(cov1) < tol); + BOOST_TEST(abs(mu_u1 - 8) < tol); + BOOST_TEST(abs(mu_v1 - 17) < tol); + + + std::array u2{8, 4}; + std::array v2{3, 7}; + auto [mu_u2, mu_v2, cov2] = means_and_covariance(u2, v2); + + BOOST_TEST(abs(cov2+4) < tol); + BOOST_TEST(abs(mu_u2 - 6) < tol); + BOOST_TEST(abs(mu_v2 - 5) < tol); + + std::vector u3{1,2,3}; + std::vector v3{1,1,1}; + + auto [mu_u3, mu_v3, cov3] = means_and_covariance(u3, v3); + + // Since v is constant, covariance(u,v) = 0 against everything any u: + BOOST_TEST(abs(cov3) < tol); + BOOST_TEST(abs(mu_u3 - 2) < tol); + BOOST_TEST(abs(mu_v3 - 1) < tol); + // Make sure we pull the correct symbol out of means_and_covariance: + cov3 = covariance(u3, v3); + BOOST_TEST(abs(cov3) < tol); + + cov3 = covariance(v3, u3); + // Covariance is symmetric: cov(u,v) = cov(v,u) + BOOST_TEST(abs(cov3) < tol); + + // cov(u,u) = sigma(u)^2: + cov3 = covariance(u3, u3); + Real expected = Real(2)/Real(3); + + BOOST_TEST(abs(cov3 - expected) < tol); + + std::mt19937 gen(15); + // Can't template standard library on multiprecision, so use double and cast back: + std::uniform_real_distribution dis(-1.0, 1.0); + std::vector u(500); + std::vector v(500); + for(size_t i = 0; i < u.size(); ++i) + { + u[i] = (Real) dis(gen); + v[i] = (Real) dis(gen); + } + + Real mu_u = boost::math::tools::mean(u); + Real mu_v = boost::math::tools::mean(v); + Real sigma_u_sq = boost::math::tools::variance(u); + Real sigma_v_sq = boost::math::tools::variance(v); + + auto [mu_u_, mu_v_, cov_uv] = means_and_covariance(u, v); + BOOST_TEST(abs(mu_u - mu_u_) < tol); + BOOST_TEST(abs(mu_v - mu_v_) < tol); + + // Cauchy-Schwartz inequality: + BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq); + // cov(X, X) = sigma(X)^2: + Real cov_uu = covariance(u, u); + BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol); + Real cov_vv = covariance(v, v); + BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol); + +} + +template +void test_correlation_coefficient() +{ + using boost::math::tools::correlation_coefficient; + + Real tol = std::numeric_limits::epsilon(); + std::vector u{1}; + std::vector v{1}; + Real rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1,1}; + v = {1,1}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1, 2, 3}; + v = {1, 2, 3}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1, 2, 3}; + v = {-1, -2, -3}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv + 1) < tol); + + rho_uv = correlation_coefficient(v, u); + BOOST_TEST(abs(rho_uv + 1) < tol); + + u = {1, 2, 3}; + v = {0, 0, 0}; + rho_uv = correlation_coefficient(v, u); + BOOST_TEST(abs(rho_uv) < tol); + + u = {1, 2, 3}; + v = {0, 0, 3}; + rho_uv = correlation_coefficient(v, u); + // mu_u = 2, sigma_u^2 = 2/3, mu_v = 1, sigma_v^2 = 2, cov(u,v) = 1. + BOOST_TEST(abs(rho_uv - sqrt(Real(3))/Real(2)) < tol); +} + +int main() +{ + test_covariance(); + test_covariance(); + test_covariance(); + test_covariance(); + + test_correlation_coefficient(); + test_correlation_coefficient(); + test_correlation_coefficient(); + test_correlation_coefficient(); + + return boost::report_errors(); +} diff --git a/test/norms_test.cpp b/test/norms_test.cpp new file mode 100644 index 0000000000..930d96ea19 --- /dev/null +++ b/test/norms_test.cpp @@ -0,0 +1,923 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using std::abs; +using std::pow; +using std::sqrt; +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; +using boost::math::tools::lp_norm; +using boost::math::tools::l1_norm; +using boost::math::tools::l2_norm; +using boost::math::tools::sup_norm; +using boost::math::tools::lp_distance; +using boost::math::tools::l1_distance; +using boost::math::tools::l2_distance; +using boost::math::tools::sup_distance; +using boost::math::tools::total_variation; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + +// To stress test, set global_seed = 0, global_size = huge. +static const constexpr size_t global_seed = 834; +static const constexpr size_t global_size = 64; + +template +std::vector generate_random_vector(size_t size, size_t seed) +{ + if (seed == 0) + { + std::random_device rd; + seed = rd(); + } + std::vector v(size); + + std::mt19937 gen(seed); + + if constexpr (std::is_floating_point::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (std::is_integral::value) + { + // Rescaling by larger than 2 is UB! + std::uniform_int_distribution dis(std::numeric_limits::lowest()/2, std::numeric_limits::max()/2); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (boost::is_complex::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else + { + BOOST_ASSERT_MSG(false, "Could not identify type for random vector generation."); + return v; + } +} + + +template +void test_lp() +{ + Real tol = 50*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + Real l3 = lp_norm(u.begin(), u.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + u[0] = -8; + l3 = lp_norm(u.cbegin(), u.cend(), 3); + BOOST_TEST(abs(l3 - 8) < tol); + + std::vector v(500); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = 7; + } + Real l8 = lp_norm(v, 8); + Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); + BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); + + // Does it work with ublas vectors? + // Does it handle the overflow of intermediates? + boost::numeric::ublas::vector w(4); + Real bignum = sqrt(std::numeric_limits::max())/256; + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = bignum; + } + Real l20 = lp_norm(w.cbegin(), w.cend(), 4); + expected = bignum*pow(w.size(), static_cast(1)/static_cast(4)); + BOOST_TEST(abs(l20 - expected) < tol*expected); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= -scale; + } + Real l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); +} + + +template +void test_complex_lp() +{ + typedef typename Complex::value_type Real; + Real tol = 50*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,0}, {0,0}}; + Real l3 = lp_norm(v.cbegin(), v.cend(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + l3 = lp_norm(v, 3); + BOOST_TEST(abs(l3 - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= -scale; + } + Real l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); +} + +template +void test_integer_lp() +{ + double tol = 100*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + double l3 = lp_norm(u.begin(), u.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= scale; + } + double l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); +} + +template +void test_lp_distance() +{ + Real tol = 100*std::numeric_limits::epsilon(); + + std::vector u{1,0,0}; + std::vector v{0,0,0}; + + Real dist = lp_distance(u,u, 3); + BOOST_TEST(abs(dist) < tol); + + dist = lp_distance(u,v, 3); + BOOST_TEST(abs(dist - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + u = generate_random_vector(global_size, global_seed+1); + Real dist1 = lp_distance(u, v, 7); + Real dist2 = lp_distance(v, u, 7); + + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_complex_lp_distance() +{ + using Real = typename Complex::value_type; + Real tol = 100*std::numeric_limits::epsilon(); + + std::vector u{{1,0},{0,0},{0,0}}; + std::vector v{{0,0},{0,0},{0,0}}; + + Real dist = boost::math::tools::lp_distance(u,u, 3); + BOOST_TEST(abs(dist) < tol); + + dist = boost::math::tools::lp_distance(u,v, 3); + BOOST_TEST(abs(dist - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + u = generate_random_vector(global_size, global_seed + 1); + Real dist1 = lp_distance(u, v, 7); + Real dist2 = lp_distance(v, u, 7); + + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_integer_lp_distance() +{ + double tol = 100*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + std::array w{0,0,0}; + double l3 = lp_distance(u, w, 3); + BOOST_TEST(abs(l3 - 1) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + auto s = generate_random_vector(global_size, global_seed + 1); + double dist1 = lp_distance(v, s, 7); + double dist2 = lp_distance(s, v, 7); + BOOST_TEST(abs(dist1 - dist2) < tol*dist2); +} + + +template +void test_integer_total_variation() +{ + double eps = std::numeric_limits::epsilon(); + std::vector v{1,1}; + double tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, 0); + + v[1] = 2; + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST_EQ(tv, 1); + + v.resize(16); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, v.size() -1); + + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = i*i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, (v.size() - 1)*(v.size() - 1)); + + // Work with std::array? + std::array w{1,1}; + tv = boost::math::tools::total_variation(w); + BOOST_TEST_EQ(tv,0); + + std::array u{1, 2, 1, 2}; + tv = boost::math::tools::total_variation(u); + BOOST_TEST_EQ(tv, 3); + + v = generate_random_vector(global_size, global_seed); + double tv1 = 2*total_variation(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double tv2 = total_variation(v); + BOOST_TEST(abs(tv1 - tv2) < tv1*eps); +} + +template +void test_total_variation() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1}; + Real tv = total_variation(v.begin(), v.end()); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + + tv = total_variation(v); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + + v[1] = 2; + tv = total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - 1) < tol); + + v.resize(50); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() -1)) < tol); + + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i*i; + } + + tv = total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() - 1)*(v.size() - 1)) < tol); + + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real tv1 = scale*total_variation(v); + for (auto & x : v) + { + x *= -scale; + } + Real tv2 = total_variation(v); + BOOST_TEST(abs(tv1 - tv2) < tol*tv1); +} + +template +void test_sup_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{-2,1,0}; + Real s = boost::math::tools::sup_norm(v.begin(), v.end()); + BOOST_TEST(abs(s - 2) < tol); + + s = boost::math::tools::sup_norm(v); + BOOST_TEST(abs(s - 2) < tol); + + // Work with std::array? + std::array w{-2,1,0}; + s = boost::math::tools::sup_norm(w); + BOOST_TEST(abs(s - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real sup1 = scale*sup_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < tol*sup1); +} + +template +void test_integer_sup_norm() +{ + double eps = std::numeric_limits::epsilon(); + std::vector v{2,1,0}; + Z s = sup_norm(v.begin(), v.end()); + BOOST_TEST_EQ(s, 2); + + s = sup_norm(v); + BOOST_TEST_EQ(s,2); + + v = generate_random_vector(global_size, global_seed); + double sup1 = 2*sup_norm(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < sup1*eps); +} + +template +void test_complex_sup_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector w{{0,-8}, {1,1}, {3,2}}; + Real s = sup_norm(w.cbegin(), w.cend()); + BOOST_TEST(abs(s-8) < tol); + + s = sup_norm(w); + BOOST_TEST(abs(s-8) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real sup1 = scale*sup_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < tol*sup1); +} + +template +void test_l0_pseudo_norm() +{ + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + BOOST_TEST_EQ(count, 1); + + // Compiles with cbegin()/cend()? + count = boost::math::tools::l0_pseudo_norm(v.cbegin(), v.cend()); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); + + std::array w{0,0,1}; + count = boost::math::tools::l0_pseudo_norm(w); + BOOST_TEST_EQ(count, 1); +} + +template +void test_complex_l0_pseudo_norm() +{ + std::vector v{{0,0}, {0,0}, {1,0}}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); +} + +template +void test_hamming_distance() +{ + std::vector v{1,2,3}; + std::vector w{1,2,4}; + size_t count = boost::math::tools::hamming_distance(v, w); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::hamming_distance(v, v); + BOOST_TEST_EQ(count, 0); +} + +template +void test_l1_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real l1 = l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + + std::array w{1,1,1}; + l1 = l1_norm(w); + BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l1_1 = scale*l1_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l1_2 = l1_norm(v); + BOOST_TEST(abs(l1_1 - l1_2) < tol*l1_1); +} + +template +void test_integer_l1_norm() +{ + double eps = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Z l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST_EQ(l1, 3); + + v = generate_random_vector(global_size, global_seed); + double l1_1 = 2*l1_norm(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double l1_2 = l1_norm(v); + BOOST_TEST(l1_1 > 0); + BOOST_TEST(l1_2 > 0); + if (abs(l1_1 - l1_2) > 2*l1_1*eps) + { + std::cout << std::setprecision(std::numeric_limits::digits10); + std::cout << "L1_1 = " << l1_1 << "\n"; + std::cout << "L1_2 = " << l1_2 << "\n"; + BOOST_TEST(abs(l1_1 - l1_2) < 2*l1_1*eps); + } +} + +template +void test_complex_l1_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}}; + Real l1 = l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l1_1 = scale*l1_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l1_2 = l1_norm(v); + BOOST_TEST(abs(l1_1 - l1_2) < tol*l1_1); +} + +template +void test_l1_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3}; + std::vector w{1,1,1}; + Real l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + l1 = boost::math::tools::l1_distance(w, v); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_distance(v, w); + BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed+1); + Real dist1 = l1_distance(v, w); + Real dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_integer_l1_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3}; + std::vector w{1,1,1}; + double l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + l1 = boost::math::tools::l1_distance(w, v); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_distance(v, w); + BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + double dist1 = l1_distance(v, w); + double dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_complex_l1_distance() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}}; + Real l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + std::vector w{{2,0}, {0,1},{0,-1}}; + l1 = boost::math::tools::l1_distance(v.cbegin(), v.cend(), w.cbegin()); + BOOST_TEST(abs(l1 - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l1_distance(v, w); + Real dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + + +template +void test_l2_norm() +{ + using std::sqrt; + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + + std::array w{1,1,1,1}; + l2 = boost::math::tools::l2_norm(w); + BOOST_TEST(abs(l2 - 2) < tol); + + Real bignum = 4*sqrt(std::numeric_limits::max()); + v[0] = bignum; + v[1] = 0; + v[2] = 0; + v[3] = 0; + l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - bignum) < tol*l2); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l2_2 = l2_norm(v); + BOOST_TEST(l2_1 > 0); + BOOST_TEST(l2_2 > 0); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); +} + +template +void test_integer_l2_norm() +{ + double tol = 100*std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + double l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= scale; + } + double l2_2 = l2_norm(v); + BOOST_TEST(l2_1 > 0); + BOOST_TEST(l2_2 > 0); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); +} + +template +void test_complex_l2_norm() +{ + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l2_2 = l2_norm(v); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); +} + +template +void test_l2_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + Real l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l2_distance(v, w); + Real dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + + +template +void test_integer_l2_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + double l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + double dist1 = l2_distance(v, w); + double dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_complex_l2_distance() +{ + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l2_distance(v, w); + Real dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_sup_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + std::vector w{0,0,0,0}; + Real sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + sup = boost::math::tools::sup_distance(v, w); + BOOST_TEST(abs(sup -1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = sup_distance(v, w); + Real dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + + +template +void test_integer_sup_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + std::vector w{0,0,0,0}; + double sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + + sup = boost::math::tools::sup_distance(v, w); + BOOST_TEST(abs(sup -1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + double dist1 = sup_distance(v, w); + double dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_complex_sup_distance() +{ + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = sup_distance(v, w); + Real dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +int main() +{ + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm(); + + test_hamming_distance(); + test_hamming_distance(); + test_hamming_distance(); + + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + + test_integer_l1_norm(); + test_integer_l1_norm(); + test_integer_l1_norm(); + + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm(); + + test_l1_distance(); + test_l1_distance(); + + test_integer_l1_distance(); + test_integer_l1_distance(); + test_integer_l1_distance(); + + test_complex_l1_distance>(); + test_complex_l1_distance(); + + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm(); + + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + + test_integer_l2_norm(); + test_integer_l2_norm(); + test_integer_l2_norm(); + + test_l2_distance(); + test_l2_distance(); + + test_integer_l2_distance(); + test_integer_l2_distance(); + test_integer_l2_distance(); + + test_complex_l2_distance>(); + test_complex_l2_distance(); + + test_lp(); + test_lp(); + test_lp(); + test_lp(); + + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp(); + + test_integer_lp(); + test_integer_lp(); + test_integer_lp(); + + test_lp_distance(); + test_lp_distance(); + + test_complex_lp_distance>(); + test_complex_lp_distance(); + + test_integer_lp_distance(); + test_integer_lp_distance(); + test_integer_lp_distance(); + + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + + test_integer_sup_norm(); + test_integer_sup_norm(); + test_integer_sup_norm(); + + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm(); + + test_sup_distance(); + test_sup_distance(); + + test_integer_sup_distance(); + test_integer_sup_distance(); + test_integer_sup_distance(); + + test_complex_sup_distance>(); + test_complex_sup_distance(); + + test_total_variation(); + test_total_variation(); + test_total_variation(); + test_total_variation(); + + test_integer_total_variation(); + test_integer_total_variation(); + test_integer_total_variation(); + + return boost::report_errors(); +} diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp new file mode 100644 index 0000000000..963ab01656 --- /dev/null +++ b/test/signal_statistics_test.cpp @@ -0,0 +1,332 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using std::abs; +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; +using boost::math::constants::two_pi; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + * 6) Does it work with integer data if sensible? + */ + +template +void test_hoyer_sparsity() +{ + using std::sqrt; + Real tol = 5*std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + BOOST_TEST(abs(hs - 1) < tol); + + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + + // Does it work with constant iterators? + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs) < tol); + + std::array w{1,1,1}; + hs = boost::math::tools::hoyer_sparsity(w); + BOOST_TEST(abs(hs) < tol); + + // Now some statistics: + // If x_i ~ Unif(0,1), E[x_i] = 1/2, E[x_i^2] = 1/3. + // Therefore, E[||x||_1] = N/2, E[||x||_2] = sqrt(N/3), + // and hoyer_sparsity(x) is close to (1-sqrt(3)/2)/(1-1/sqrt(N)) + std::mt19937 gen(82); + std::uniform_real_distribution dis(0, 1); + v.resize(5000); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = dis(gen); + } + hs = boost::math::tools::hoyer_sparsity(v); + Real expected = (1.0 - boost::math::constants::root_three()/2)/(1.0 - 1.0/sqrt(v.size())); + BOOST_TEST(abs(expected - hs) < 0.01); + + // Does it work with a forward list? + std::forward_list u1{1, 1, 1}; + hs = boost::math::tools::hoyer_sparsity(u1); + BOOST_TEST(abs(hs) < tol); + + // Does it work with a boost ublas vector? + boost::numeric::ublas::vector u2(3); + u2[0] = 1; + u2[1] = 1; + u2[2] = 1; + hs = boost::math::tools::hoyer_sparsity(u2); + BOOST_TEST(abs(hs) < tol); + +} + +template +void test_integer_hoyer_sparsity() +{ + using std::sqrt; + double tol = 5*std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + double hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs) < tol); +} + + +template +void test_complex_hoyer_sparsity() +{ + typedef typename Complex::value_type Real; + using std::sqrt; + Real tol = 5*std::numeric_limits::epsilon(); + std::vector v{{0,1}, {0, 0}, {0,0}}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + BOOST_TEST(abs(hs - 1) < tol); + + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + + // Does it work with constant iterators? + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs - 1) < tol); + + // All are the same magnitude: + v[0] = {0, 1}; + v[1] = {1, 0}; + v[2] = {0,-1}; + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs) < tol); +} + + +template +void test_absolute_gini_coefficient() +{ + using boost::math::tools::absolute_gini_coefficient; + using boost::math::tools::sample_absolute_gini_coefficient; + Real tol = std::numeric_limits::epsilon(); + std::vector v{-1,0,0}; + Real gini = sample_absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini - 1) < tol); + + gini = absolute_gini_coefficient(v); + BOOST_TEST(abs(gini - Real(2)/Real(3)) < tol); + + v[0] = 1; + v[1] = -1; + v[2] = 1; + gini = absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + gini = sample_absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::vector> w(128); + std::complex i{0,1}; + for(size_t k = 0; k < w.size(); ++k) + { + w[k] = exp(i*static_cast(k)/static_cast(w.size())); + } + gini = absolute_gini_coefficient(w.begin(), w.end()); + BOOST_TEST(abs(gini) < tol); + gini = sample_absolute_gini_coefficient(w.begin(), w.end()); + BOOST_TEST(abs(gini) < tol); + + // The population Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). + // We use the sample Gini index, so we need to rescale + std::vector u(1000); + std::mt19937 gen(35); + std::uniform_real_distribution dis(0, 50); + for (size_t i = 0; i < u.size()/2; ++i) + { + u[i] = dis(gen); + } + for (size_t i = 0; i < u.size()/2; ++i) + { + u[i + u.size()/2] = u[i]; + } + Real population_gini1 = absolute_gini_coefficient(u.begin(), u.begin() + u.size()/2); + Real population_gini2 = absolute_gini_coefficient(u.begin(), u.end()); + + BOOST_TEST(abs(population_gini1 - population_gini2) < 10*tol); + + // The Gini coefficient of a uniform distribution is (b-a)/(3*(b+a)), see https://en.wikipedia.org/wiki/Gini_coefficient + Real expected = (dis.b() - dis.a() )/(3*(dis.a() + dis.b())); + + BOOST_TEST(abs(expected - population_gini1) < 0.01); + + std::exponential_distribution exp_dis(1); + for (size_t i = 0; i < u.size(); ++i) + { + u[i] = exp_dis(gen); + } + population_gini2 = absolute_gini_coefficient(u); + + BOOST_TEST(abs(population_gini2 - 0.5) < 0.01); +} + + +template +void test_oracle_snr() +{ + using std::abs; + Real tol = 100*std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, 1); + std::vector noisy_signal = signal; + + noisy_signal[0] += 1; + Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} + +template +void test_integer_oracle_snr() +{ + double tol = std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, 1); + std::vector noisy_signal = signal; + + noisy_signal[0] += 1; + double snr = boost::math::tools::oracle_snr(signal, noisy_signal); + double snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} + +template +void test_complex_oracle_snr() +{ + using Real = typename Complex::value_type; + using std::abs; + using std::log10; + Real tol = 100*std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, {1,0}); + std::vector noisy_signal = signal; + + noisy_signal[0] += Complex(1,0); + Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} + +template +void test_m2m4_snr_estimator() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector signal(5000, 1); + std::vector x(signal.size()); + std::mt19937 gen(18); + std::normal_distribution dis{0, 1.0}; + + for (size_t i = 0; i < x.size(); ++i) + { + signal[i] = 5*sin(100*6.28*i/x.size()); + x[i] = signal[i] + dis(gen); + } + + // Kurtosis of a sine wave is 1.5: + auto m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5); + auto oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); + BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); + + std::uniform_real_distribution uni_dis{-1,1}; + for (size_t i = 0; i < x.size(); ++i) + { + x[i] = signal[i] + uni_dis(gen); + } + + // Kurtosis of continuous uniform distribution over [-1,1] is 1.8: + m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5, 1.8); + oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); + // The performance depends on the exact numbers generated by the distribution, but this isn't bad: + BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); + + // The SNR estimator should be scale invariant. + // If x has snr y, then kx should have snr y. + Real ka = 1.5; + Real kw = 1.8; + auto m2m4 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + for(size_t i = 0; i < x.size(); ++i) + { + x[i] *= 4096; + } + auto m2m4_2 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + BOOST_TEST(abs(m2m4 - m2m4_2) < tol); +} + +int main() +{ + test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); + + test_hoyer_sparsity(); + test_hoyer_sparsity(); + test_hoyer_sparsity(); + test_hoyer_sparsity(); + + test_integer_hoyer_sparsity(); + test_integer_hoyer_sparsity(); + + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity(); + + test_oracle_snr(); + test_oracle_snr(); + test_oracle_snr(); + test_oracle_snr(); + + test_integer_oracle_snr(); + test_integer_oracle_snr(); + + test_complex_oracle_snr>(); + test_complex_oracle_snr>(); + test_complex_oracle_snr>(); + test_complex_oracle_snr(); + + test_m2m4_snr_estimator(); + test_m2m4_snr_estimator(); + test_m2m4_snr_estimator(); + + return boost::report_errors(); +} diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 3073397302..b26b91f634 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #define BOOST_CHECK_CLOSE_EX(a, b, prec, i) \ @@ -420,6 +421,91 @@ void test_daubechies_fails() } #endif +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) +template +void test_solve_real_quadratic() +{ + Real tol = std::numeric_limits::epsilon(); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots(1, 0, -1); + BOOST_CHECK_CLOSE(x0, Real(-1), tol); + BOOST_CHECK_CLOSE(x1, Real(1), tol); + + auto p = quadratic_roots(7, 0, 0); + BOOST_CHECK_SMALL(p.first, tol); + BOOST_CHECK_SMALL(p.second, tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = quadratic_roots(1, -14, 49); + BOOST_CHECK_CLOSE(p.first, Real(7), tol); + BOOST_CHECK_CLOSE(p.second, Real(7), tol); + + // This test does not pass in multiprecision, + // due to the fact it does not have an fma: + if (std::is_floating_point::value) + { + // (x-1)(x-1-eps) = x^2 + (-eps - 2)x + (1)(1+eps) + Real eps = 2*std::numeric_limits::epsilon(); + p = quadratic_roots(256, 256*(-2 - eps), 256*(1 + eps)); + BOOST_CHECK_CLOSE(p.first, Real(1), tol); + BOOST_CHECK_CLOSE(p.second, Real(1) + eps, tol); + } + + if (std::is_same::value) + { + // Kahan's example: This is the test that demonstrates the necessity of the fma instruction. + // https://en.wikipedia.org/wiki/Loss_of_significance#Instability_of_the_quadratic_equation + p = quadratic_roots(94906265.625, -189812534, 94906268.375); + BOOST_CHECK_CLOSE_FRACTION(p.first, Real(1), tol); + BOOST_CHECK_CLOSE_FRACTION(p.second, 1.000000028975958, 4*tol); + } +} + +template +void test_solve_int_quadratic() +{ + double tol = std::numeric_limits::epsilon(); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots(1, 0, -1); + BOOST_CHECK_CLOSE(x0, double(-1), tol); + BOOST_CHECK_CLOSE(x1, double(1), tol); + + auto p = quadratic_roots(7, 0, 0); + BOOST_CHECK_SMALL(p.first, tol); + BOOST_CHECK_SMALL(p.second, tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = quadratic_roots(1, -14, 49); + BOOST_CHECK_CLOSE(p.first, double(7), tol); + BOOST_CHECK_CLOSE(p.second, double(7), tol); +} + +template +void test_solve_complex_quadratic() +{ + using Real = typename Complex::value_type; + Real tol = std::numeric_limits::epsilon(); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots({1,0}, {0,0}, {-1,0}); + BOOST_CHECK_CLOSE(x0.real(), Real(-1), tol); + BOOST_CHECK_CLOSE(x1.real(), Real(1), tol); + BOOST_CHECK_SMALL(x0.imag(), tol); + BOOST_CHECK_SMALL(x1.imag(), tol); + + auto p = quadratic_roots({7,0}, {0,0}, {0,0}); + BOOST_CHECK_SMALL(p.first.real(), tol); + BOOST_CHECK_SMALL(p.second.real(), tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = quadratic_roots({1,0}, {-14,0}, {49,0}); + BOOST_CHECK_CLOSE(p.first.real(), Real(7), tol); + BOOST_CHECK_CLOSE(p.second.real(), Real(7), tol); + +} + + +#endif + BOOST_AUTO_TEST_CASE( test_main ) { @@ -432,4 +518,14 @@ BOOST_AUTO_TEST_CASE( test_main ) test_daubechies_fails(); #endif +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) + test_solve_real_quadratic(); + test_solve_real_quadratic(); + test_solve_real_quadratic(); + test_solve_real_quadratic(); + + test_solve_int_quadratic(); + test_solve_complex_quadratic>(); +#endif + } diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp new file mode 100644 index 0000000000..4896f52cc4 --- /dev/null +++ b/test/univariate_statistics_test.cpp @@ -0,0 +1,768 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + + // To stress test, set global_seed = 0, global_size = huge. + static const constexpr size_t global_seed = 0; + static const constexpr size_t global_size = 128; + +template +std::vector generate_random_vector(size_t size, size_t seed) +{ + if (seed == 0) + { + std::random_device rd; + seed = rd(); + } + std::vector v(size); + + std::mt19937 gen(seed); + + if constexpr (std::is_floating_point::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (std::is_integral::value) + { + // Rescaling by larger than 2 is UB! + std::uniform_int_distribution dis(std::numeric_limits::lowest()/2, std::numeric_limits::max()/2); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (boost::is_complex::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else + { + BOOST_ASSERT_MSG(false, "Could not identify type for random vector generation."); + return v; + } +} + + +template +void test_integer_mean() +{ + double tol = 100*std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + double mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); + + // Work with std::array? + std::array w{1,2,3,4,5}; + mu = boost::math::tools::mean(w); + BOOST_TEST(abs(mu - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + + double m1 = scale*boost::math::tools::mean(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::mean(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); +} + +template +void test_mean() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + Real mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu - 3) < tol); + + // Does range call work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); + + // Can we successfully average only part of the vector? + mu = boost::math::tools::mean(v.begin(), v.begin() + 3); + BOOST_TEST(abs(mu - 2) < tol); + + // Does it work when we const qualify? + mu = boost::math::tools::mean(v.cbegin(), v.cend()); + BOOST_TEST(abs(mu - 3) < tol); + + // Does it work for std::array? + std::array u{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(u.begin(), u.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work for a forward iterator? + std::forward_list l{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(l.begin(), l.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work with ublas vectors? + boost::numeric::ublas::vector w(7); + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = i+1; + } + mu = boost::math::tools::mean(w.cbegin(), w.cend()); + BOOST_TEST(abs(mu - 4) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = scale*boost::math::tools::mean(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::mean(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); +} + +template +void test_complex_mean() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{0,1},{0,2},{0,3},{0,4},{0,5}}; + auto mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); + + // Does range work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); +} + +template +void test_variance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + Real sigma_sq = boost::math::tools::variance(v.begin(), v.end()); + BOOST_TEST(abs(sigma_sq) < tol); + + sigma_sq = boost::math::tools::variance(v); + BOOST_TEST(abs(sigma_sq) < tol); + + Real s_sq = boost::math::tools::sample_variance(v); + BOOST_TEST(abs(s_sq) < tol); + + std::vector u{1}; + sigma_sq = boost::math::tools::variance(u.cbegin(), u.cend()); + BOOST_TEST(abs(sigma_sq) < tol); + + std::array w{0,1,0,1,0,1,0,1}; + sigma_sq = boost::math::tools::variance(w.begin(), w.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + sigma_sq = boost::math::tools::variance(w); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + std::forward_list l{0,1,0,1,0,1,0,1}; + sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = scale*scale*boost::math::tools::variance(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::variance(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); + + // Wikipedia example for a variance of N sided die: + // https://en.wikipedia.org/wiki/Variance + for (size_t j = 16; j < 2048; j *= 2) + { + v.resize(1024); + Real n = v.size(); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = i + 1; + } + + sigma_sq = boost::math::tools::variance(v); + + BOOST_TEST(abs(sigma_sq - (n*n-1)/Real(12)) <= tol*sigma_sq); + } + +} + +template +void test_integer_variance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + double sigma_sq = boost::math::tools::variance(v); + BOOST_TEST(abs(sigma_sq) < tol); + + std::forward_list l{0,1,0,1,0,1,0,1}; + sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = scale*scale*boost::math::tools::variance(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::variance(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); +} + +template +void test_integer_skewness() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - 3.0/2.0) < tol); + + std::forward_list v2{0,0,0,0,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - 3.0/2.0) < tol); + + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = boost::math::tools::skewness(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::skewness(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); + +} + +template +void test_skewness() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + std::array w1{0,0,0,0,5}; + skew = boost::math::tools::skewness(w1); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + std::forward_list w2{0,0,0,0,5}; + skew = boost::math::tools::skewness(w2); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = boost::math::tools::skewness(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::skewness(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); +} + +template +void test_kurtosis() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - Real(17)/Real(10)) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + + std::array v1{0,0,0,0,5}; + kurt = boost::math::tools::kurtosis(v1); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + + std::forward_list v2{0,0,0,0,5}; + kurt = boost::math::tools::kurtosis(v2); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + + std::vector v3(10000); + std::mt19937 gen(42); + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = dis(gen); + } + kurt = boost::math::tools::kurtosis(v3); + BOOST_TEST(abs(kurt - 3) < 0.1); + + std::uniform_real_distribution udis(-1, 3); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = udis(gen); + } + auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3); + BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = boost::math::tools::kurtosis(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); + + // This test only passes when there are a large number of samples. + // Otherwise, the distribution doesn't generate enough outliers to give, + // or generates too many, giving pretty wildly different values of kurtosis on different runs. + // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run. + // The CI system, however, would die on a million long doubles. + //v3.resize(1000000); + //std::exponential_distribution edis(0.1); + //for (size_t i = 0; i < v3.size(); ++i) { + // v3[i] = edis(gen); + //} + //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3; + //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2); +} + +template +void test_integer_kurtosis() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - 17.0/10.0) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - 13.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = boost::math::tools::kurtosis(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); +} + +template +void test_first_four_moments() +{ + Real tol = 10*std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + auto [M1_1, M2_1, M3_1, M4_1] = boost::math::tools::first_four_moments(v); + BOOST_TEST(abs(M1_1 - 1) < tol); + BOOST_TEST(abs(M2_1) < tol); + BOOST_TEST(abs(M3_1) < tol); + BOOST_TEST(abs(M4_1) < tol); + + v = {1, 2, 3, 4, 5}; + auto [M1_2, M2_2, M3_2, M4_2] = boost::math::tools::first_four_moments(v); + BOOST_TEST(abs(M1_2 - 3) < tol); + BOOST_TEST(abs(M2_2 - 2) < tol); + BOOST_TEST(abs(M3_2) < tol); + BOOST_TEST(abs(M4_2 - Real(34)/Real(5)) < tol); +} + +template +void test_median() +{ + std::mt19937 g(12); + std::vector v{1,2,3,4,5,6,7}; + + Real m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + std::shuffle(v.begin(), v.end(), g); + // Does range call work? + m = boost::math::tools::median(v); + BOOST_TEST_EQ(m, 4); + + v = {1,2,3,3,4,5}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {2,4}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1,1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,2,3}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + + // Does it work with std::array? + std::array w{1,2,3}; + m = boost::math::tools::median(w); + BOOST_TEST_EQ(m, 2); + + // Does it work with ublas? + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 2; + w1[2] = 3; + m = boost::math::tools::median(w); + BOOST_TEST_EQ(m, 2); +} + +template +void test_median_absolute_deviation() +{ + std::vector v{-1, 2, -3, 4, -5, 6, -7}; + + Real m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 4); + + std::mt19937 g(12); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v, 0); + BOOST_TEST_EQ(m, 4); + + v = {1, -2, -3, 3, -4, -5}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + + v = {-1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + + v = {-1, 1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + // The median is zero, so coincides with the default: + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + m = boost::math::tools::median_absolute_deviation(v); + BOOST_TEST_EQ(m, 1); + + + v = {2, -4}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + + v = {1, -1, 1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + + v = {1, 2, -3}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 2); + + std::array w{1, 2, -3}; + m = boost::math::tools::median_absolute_deviation(w, 0); + BOOST_TEST_EQ(m, 2); + + // boost.ublas vector? + boost::numeric::ublas::vector u(6); + u[0] = 1; + u[1] = 2; + u[2] = -3; + u[3] = 1; + u[4] = 2; + u[5] = -3; + m = boost::math::tools::median_absolute_deviation(u, 0); + BOOST_TEST_EQ(m, 2); +} + + +template +void test_sample_gini_coefficient() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini - 1) < tol); + + gini = boost::math::tools::sample_gini_coefficient(v); + BOOST_TEST(abs(gini - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::sample_gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); +} + + +template +void test_gini_coefficient() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + Real expected = Real(2)/Real(3); + BOOST_TEST(abs(gini - expected) < tol); + + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); + + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 1; + w1[2] = 1; + gini = boost::math::tools::gini_coefficient(w1); + BOOST_TEST(abs(gini) < tol); + + std::mt19937 gen(18); + // Gini coefficient for a uniform distribution is (b-a)/(3*(b+a)); + std::uniform_real_distribution dis(0, 3); + expected = (dis.b() - dis.a())/(3*(dis.b()+ dis.a())); + v.resize(1024); + for(size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < 0.01); + +} + +template +void test_integer_gini_coefficient() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + double gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + double expected = 2.0/3.0; + BOOST_TEST(abs(gini - expected) < tol); + + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); + + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 1; + w1[2] = 1; + gini = boost::math::tools::gini_coefficient(w1); + BOOST_TEST(abs(gini) < tol); +} + +int main() +{ + test_mean(); + test_mean(); + test_mean(); + test_mean(); + + test_integer_mean(); + test_integer_mean(); + test_integer_mean(); + + test_complex_mean>(); + test_complex_mean(); + + test_variance(); + test_variance(); + test_variance(); + test_variance(); + + test_integer_variance(); + test_integer_variance(); + + test_skewness(); + test_skewness(); + test_skewness(); + test_skewness(); + + test_integer_skewness(); + test_integer_skewness(); + + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); + + test_kurtosis(); + test_kurtosis(); + test_kurtosis(); + // Kinda expensive: + //test_kurtosis(); + + test_integer_kurtosis(); + test_integer_kurtosis(); + + test_median(); + test_median(); + test_median(); + test_median(); + test_median(); + + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + + test_integer_gini_coefficient(); + test_integer_gini_coefficient(); + + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + + return boost::report_errors(); +}