From aa43b5b52b143b97357e9a23722ccede33566929 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 6 Dec 2018 20:33:05 -0700 Subject: [PATCH 01/46] Vector functionals, first pass [CI SKIP] --- doc/math.qbk | 4 + doc/vector_functionals/vector_functionals.qbk | 203 +++++++++++++++ .../boost/math/tools/vector_functionals.hpp | 245 ++++++++++++++++++ test/vector_functionals_test.cpp | 217 ++++++++++++++++ 4 files changed, 669 insertions(+) create mode 100644 doc/vector_functionals/vector_functionals.qbk create mode 100644 include/boost/math/tools/vector_functionals.hpp create mode 100644 test/vector_functionals_test.cpp diff --git a/doc/math.qbk b/doc/math.qbk index 711e88f8c6..90585886f3 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -552,6 +552,10 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include distributions/dist_reference.qbk] [/includes all individual distribution.qbk files] [endmathpart] [/section:dist Statistical Distributions and Functions] +[mathpart vector_functionals Vector Functionals] +[include vector_functionals/vector_functionals.qbk] +[endmathpart] [/section:vector_functionals Vector Functionals] + [mathpart special Special Functions] [include sf/number_series.qbk] diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk new file mode 100644 index 0000000000..046a1bcd3e --- /dev/null +++ b/doc/vector_functionals/vector_functionals.qbk @@ -0,0 +1,203 @@ +[/ + Copyright 2017 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:vector_functionals Vector Functionals] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto mean(ForwardIterator first, ForwardIterator last); + + template + auto mean_and_variance(ForwardIterator first, ForwardIterator last); + + template + auto median(ForwardIterator first, ForwardIterator last); + + template + auto absolute_median(ForwardIterator first, ForwardIterator last); + + template + auto shannon_entropy(ForwardIterator first, ForwardIterator last); + + template + auto normalized_shannon_entropy(ForwardIterator first, ForwardIterator last); + + template + auto gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto pq_mean(ForwardIterator first, ForwardIterator last, p, q); + + template + auto lp_norm(ForwardIterator first, ForwardIterator last, p); + + template + auto l0_norm(ForwardIterator first, ForwardIterator last); + + template + auto l1_norm(ForwardIterator first, ForwardIterator last); + + template + auto l2_norm(ForwardIterator first, ForwardIterator last); + + template + auto sup_norm(ForwardIterator first, ForwardIterator last); + + template + auto lp_distance(RandomAccessContainer const & u, RandomAccessContainer const & v, p); + + template + auto l1_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); + + template + auto l2_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); + + template + auto sup_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); + + template + auto total_variation(ForwardIterator first, ForwardIterator last); + + template + auto lanczos_noisy_derivative(RandomAccessContainer const & v, time_step, time); + + template + auto kurtosis(ForwardIterator first, ForwardIterator last); + + template + auto skewness(ForwardIterator first, ForwardIterator last); + + template + auto covariance(RandomAccessContainer const & u, RandomAccessContainer const & v); + + template + auto simpsons_rule_quadrature(ForwardIterator first, ForwardIterator last); + + template + auto simpsons_three_eighths_quadrature(ForwardIterator first, ForwardIterator last); + + template + auto booles_rule_quadrature(ForwardIterator first, ForwardIterator last); + + template + auto inner_product(RandomAccessContainer const & u, RandomAccessContainer const & v); + + +}}} +`` + +[heading Description] + +The file `boost/math/tools/vector_functionals.hpp` is a set of facilities for computing scalar values from vectors. +We use the word "vector functional" in the [@https://ncatlab.org/nlab/show/nonlinear+functional mathematical sense], indicating a map \u2113:\u211D[super n] \u2192 \u211D, +and occasionally maps from \u2102[super n] \u2192 \u211D and \u2102[super n] \u2192 \u2102. +The set of maps provided herein attempt to cover the most commonly encountered functionals from statistics, numerical analysis, and signal processing. + +Many of these functionals have trivial naive implementations, but experienced programmers will recognize that even trivial algorithms are easy to screw up, and that numerical instabilities often lurk in corner cases. +We have attempted to do our "due diligence" to root out these problems-scouring the literature for numerically stable algorithms for even the simplest of functionals. + +/Nota bene/: Some similar functionality is provided in [@https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html Boost Accumulators Framework]. +These accumulators should be used in real-time applications; `vector_functionals.hpp` should be used when CPU vectorization is needed. +As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags. + +We now describe each functional in detail. + +Compute the mean of a container: + + std::vector v{1,2,3,4,5}; + double mu = mean(v.begin(), v.end()); + +The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. +The only requirement on the input is that it must be forward iterable, so you can use Eigen vectors, ublas vectors, Armadillo vectors, or a `std::forward_list` to hold your data. + + +Compute the mean and sample variance: + + std::vector v{1,2,3,4,5}; + auto [mu, s] = mean_and_sample_variance(v.begin(), v.end()); + +The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. +Note that we do not provide computation of sample variance alone; +we are unaware of any one-pass, numerically stable computation of sample variance which does not require simultaneous computation of the mean. +If the mean is not required, simply ignore it. +The input datatype must be forward iterable and the range `[first, last)` must contain at least two elements. + +Compute the median of a dataset: + + std::vector v{1,2,3,4,5}; + double m = boost::math::tools::median(v.begin(), v.end()); + +/Nota bene: The input vector is modified./ +The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element nth-element]. +Therefore, all requirements of `nth_element` are inherited by the median calculation. + + +Compute the sup norm of a dataset: + + std::vector v{-3, 2, 1}; + double sup = boost::math::tools::sup_norm(v.begin(), v.end()); + // sup = 3 + + std::vector> v{{0, -8}, {1,1}, {-3,2}}; + double sup = boost::math::tools::sup_norm(v.begin(), v.end()); + // sup = 8 + +Note how the calculation of \u2113[super p] norms can be performed in both real and complex arithmetic. + +Compute the Gini coefficient of a dataset: + + std::vector v{1,0,0,0}; + double gini = gini_coefficient(v.begin(), v.end()); + // gini = 1, as v[0] holds all the "wealth" + std::vector w{1,1,1,1}; + gini = gini_coefficient(w.begin(), w.end()); + // gini = 0, as all elements are now equal. + +/Nota bene: The input data is altered-in particular, it is sorted./ + +/Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. +We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "consistent estimator" of the population Gini coefficient. +Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1/N]. + +The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. +A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. +However, for measuring sparsity, the phase of the numbers is irrelevant, so `absolute_gini_coefficient` should be used instead: + + std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; + double abs_gini = absolute_gini_coefficient(v.begin(), v.end()); + // abs_gini = 1 + std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; + double abs_gini = absolute_gini_coefficient(w.begin(), w.end()); + // abs_gini = 0 + + + +[heading Examples] + +[heading Performance] + +[heading Caveats] + +[heading References] + +* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. +* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. +* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. + + +[endsect] +[/section:vector_functionals Vector Functionals] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp new file mode 100644 index 0000000000..8e77e97f9b --- /dev/null +++ b/include/boost/math/tools/vector_functionals.hpp @@ -0,0 +1,245 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_VECTOR_FUNCTIONALS_HPP +#define BOOST_MATH_TOOLS_VECTOR_FUNCTIONALS_HPP + +#include +#include +#include +#include +#include +/* + * A set of tools for computing scalar quantities associated with lists of numbers. + */ + + +namespace boost{ namespace math{ namespace tools { + +template +auto +mean(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + Real mu = 0; + Real i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; +} + +template +auto +mean_and_population_variance(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); + // Higham, Accuracy and Stability, equation 1.6a and 1.6b: + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = first + 1; it != last; ++it) + { + Real tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + + return std::make_pair(M, Q/(k-1)); +} + +template +auto median(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + Real m = 0; + return m; +} + +// Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: +template +auto total_variation(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); + Real tot = 0; + auto it = first; + Real tmp = *it; + while (++it != last) + { + tot += abs(*it - tmp); + tmp = *it; + } + return tot; +} + +// Mallat, equation 10.4 uses the base-2 logarithm. +template +auto shannon_entropy(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + using std::log2; + Real entropy = 0; + for (auto it = first; it != last; ++it) + { + Real tmp = *it; + if (tmp != 0) + { + entropy += tmp*log2(tmp); + } + } + return -entropy; +} + +template +auto sup_norm(ForwardIterator first, ForwardIterator last) +{ + BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); + typedef typename std::remove_const())>::type>::type RealOrComplex; + using std::abs; + if constexpr (boost::is_complex::value) + { + auto it = max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + return abs(*it); + } + else + { + auto pair = minmax_element(first, last); + if (abs(*pair.first) > abs(*pair.second)) + { + return abs(*pair.first); + } + else + { + return abs(*pair.second); + } + } +} + +template +size_t l0_norm(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type RealOrComplex; + size_t count = 0; + for (auto it = first; it != last; ++it) + { + if (*it != RealOrComplex(0)) + { + ++count; + } + } + return count; +} + +template +auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_const())>::type>::type p) +{ + using std::pow; + using std::is_floating_point; + using std::isfinite; + typedef typename std::remove_const())>::type>::type RealOrComplex; + if constexpr (boost::is_complex::value) + { + BOOST_ASSERT_MSG(p.real() >= 0, "For p < 0, the lp norm is not a norm."); + BOOST_ASSERT_MSG(p.imag() == 0, "For imaginary p, the lp norm is not a norm."); + using std::norm; + decltype(p.real()) lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(norm(*it), p.real()/2); + } + + auto result = pow(lp, 1/p.real()); + if (!isfinite(result)) + { + auto a = boost::math::tools::sup_norm(first, last); + decltype(p.real()) lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p.real()); + } + result = a*pow(lp, 1/p.real()); + } + return result; + } + else if constexpr (is_floating_point::value) + { + BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); + RealOrComplex lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it), p); + } + RealOrComplex result = pow(lp, 1/p); + if (!isfinite(result)) + { + RealOrComplex a = boost::math::tools::sup_norm(first, last); + lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p); + } + result = a*pow(lp, 1/p); + } + return result; + } + else + { + BOOST_ASSERT_MSG(false, "Unable to determine if the input type is real or complex."); + } +} + +template +auto gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last); + + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) { + num += *it*i; + denom += *it; + ++i; + } + return ((2*num)/denom - i)/(i-2); +} + + +template +auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type RealOrComplex; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + + + decltype(abs(*first)) i = 1; + decltype(abs(*first)) num = 0; + decltype(abs(*first)) denom = 0; + std::cout << "{"; + for (auto it = first; it != last; ++it) { + std::cout << abs(*it) << ", "; + decltype(abs(*first)) tmp = abs(*it); + num += tmp*i; + denom += tmp; + ++i; + } + std::cout << "}\n"; + return ((2*num)/denom - i)/(i-2); +} + + + +}}} +#endif diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp new file mode 100644 index 0000000000..c82037932a --- /dev/null +++ b/test/vector_functionals_test.cpp @@ -0,0 +1,217 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +template +void test_mean() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + Real mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu - 3) < tol); + + // Can we successfully average only part of the vector? + mu = boost::math::tools::mean(v.begin(), v.begin() + 3); + BOOST_TEST(abs(mu - 2) < tol); + + // Does it work when we const qualify? + mu = boost::math::tools::mean(v.cbegin(), v.cend()); + BOOST_TEST(abs(mu - 3) < tol); + + // Does it work for std::array? + std::array u{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(u.begin(), u.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work for a forward iterator? + std::forward_list l{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(l.begin(), l.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work with ublas vectors? + boost::numeric::ublas::vector w(7); + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = i+1; + } + mu = boost::math::tools::mean(w.cbegin(), w.cend()); + BOOST_TEST(abs(mu - 4) < tol); + +} + +template +void test_mean_and_population_variance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end()); + BOOST_TEST(abs(mu - 1) < tol); + BOOST_TEST(abs(sigma_sq) < tol); + + std::vector u{1}; + auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.begin(), u.end()); + BOOST_TEST(abs(mu1 - 1) < tol); + BOOST_TEST(abs(sigma1_sq) < tol); + + std::vector w{0,1,0,1,0,1,0,1}; + auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); + BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); + BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); +} + +template +void test_lp() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector> v{{1,0}, {0,0}, {0,0}}; + Real l3 = boost::math::tools::lp_norm(v.begin(), v.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + std::vector u{1,0,0}; + l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); +} + +template +void test_total_variation() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1}; + Real tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + + v[1] = 2; + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - 1) < tol); + + v.resize(50); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() -1)) < tol); + + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i*i; + } + + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() -1)*(v.size()-1)) < tol); +} + +template +void test_sup_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{-2,1,0}; + Real s = boost::math::tools::sup_norm(v.begin(), v.end()); + BOOST_TEST(abs(s - 2) < tol); + + std::vector> w{{0,-8}, {1,1}, {3,2}}; + s = boost::math::tools::sup_norm(w.begin(), w.end()); + BOOST_TEST(abs(s-8) < tol); +} + +template +void test_gini_coefficient() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); +} + +template +void test_absolute_gini_coefficient() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{-1,0,0}; + Real gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini - 1) < tol); + + v[0] = 1; + v[1] = -1; + v[2] = 1; + gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::vector> w(128); + std::complex i{0,1}; + for(size_t k = 0; k < w.size(); ++k) + { + w[k] = exp(i*static_cast(k)/static_cast(w.size())); + } + gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); + BOOST_TEST(abs(gini) < tol); + +} + +template +void test_l0_norm() +{ + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_norm(v.begin(), v.end()); + BOOST_TEST_EQ(count, 1); +} + +int main() +{ + test_mean(); + test_mean(); + test_mean(); + test_mean(); + + test_mean_and_population_variance(); + test_mean_and_population_variance(); + test_mean_and_population_variance(); + test_mean_and_population_variance(); + + test_lp(); + test_lp(); + test_lp(); + + test_total_variation(); + test_total_variation(); + test_total_variation(); + test_total_variation(); + + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + + test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); + + test_l0_norm(); + test_l0_norm(); + test_l0_norm(); + + return boost::report_errors(); +} From a9985e3e1ca593a8c2d0baa88dea648d884440de Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 7 Dec 2018 09:02:25 -0700 Subject: [PATCH 02/46] Hoyer sparsity [CI SKIP] --- doc/vector_functionals/vector_functionals.qbk | 53 ++++++++++++++++--- .../boost/math/tools/vector_functionals.hpp | 42 +++++++++++++-- test/vector_functionals_test.cpp | 37 +++++++++++++ 3 files changed, 120 insertions(+), 12 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index 046a1bcd3e..03a5724e20 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -116,6 +116,8 @@ As a reminder, remember that to actually /get/ vectorization, compile with `-mar We now describe each functional in detail. +[heading Mean] + Compute the mean of a container: std::vector v{1,2,3,4,5}; @@ -125,6 +127,8 @@ The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a The only requirement on the input is that it must be forward iterable, so you can use Eigen vectors, ublas vectors, Armadillo vectors, or a `std::forward_list` to hold your data. +[heading Mean and Sample variance] + Compute the mean and sample variance: std::vector v{1,2,3,4,5}; @@ -132,10 +136,12 @@ Compute the mean and sample variance: The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. Note that we do not provide computation of sample variance alone; -we are unaware of any one-pass, numerically stable computation of sample variance which does not require simultaneous computation of the mean. +we are unaware of any one-pass, numerically stable computation of sample variance which does not simultaneously generate the mean. If the mean is not required, simply ignore it. The input datatype must be forward iterable and the range `[first, last)` must contain at least two elements. +[heading Median] + Compute the median of a dataset: std::vector v{1,2,3,4,5}; @@ -145,7 +151,7 @@ Compute the median of a dataset: The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element nth-element]. Therefore, all requirements of `nth_element` are inherited by the median calculation. - +[heading Sup norm] Compute the sup norm of a dataset: std::vector v{-3, 2, 1}; @@ -158,6 +164,8 @@ Compute the sup norm of a dataset: Note how the calculation of \u2113[super p] norms can be performed in both real and complex arithmetic. +[heading Gini Coefficient] + Compute the Gini coefficient of a dataset: std::vector v{1,0,0,0}; @@ -170,8 +178,13 @@ Compute the Gini coefficient of a dataset: /Nota bene: The input data is altered-in particular, it is sorted./ /Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. -We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "consistent estimator" of the population Gini coefficient. +We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "sample Gini coefficient". Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1/N]. +If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)/ /n/. + +/Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. +However, since a single use case (measuring wealth inequality when some people have negative wealth) exists, we do not throw an exception when negative values are encountered. +You should have /very/ good cause to pass negative values to the Gini coefficient calculator. The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. @@ -179,18 +192,42 @@ However, for measuring sparsity, the phase of the numbers is irrelevant, so `abs std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; double abs_gini = absolute_gini_coefficient(v.begin(), v.end()); - // abs_gini = 1 + // now abs_gini = 1 + std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; double abs_gini = absolute_gini_coefficient(w.begin(), w.end()); - // abs_gini = 0 + // now abs_gini = 0 + + std::vector u{-1, 1, -1}; + double abs_gini = absolute_gini_coefficient(u.begin(), u.end()); + // now abs_gini = 0 + +Again, Wikipedia denotes our scaling as a "sample Gini coefficient". +We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient. + +If sorting the input data is to much expense for a sparsity measure (is it going to be perfect anyway?), +consider using `hoyer_sparsity`. + +[heading Hoyer Sparsity] + +The Hoyer sparsity measure uses a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. +As the name suggests, it is used to measure sparsity in an expansion in some basis. + +The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). + +Usage: + + std::vector v{1,0,0}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + // hs = 1 + +[heading \u2113[super /p/] norm] -[heading Examples] -[heading Performance] +[heading \u2113[super 0] norm] -[heading Caveats] [heading References] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index 8e77e97f9b..2cc0526802 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -211,6 +211,13 @@ auto gini_coefficient(ForwardIterator first, ForwardIterator last) denom += *it; ++i; } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + return ((2*num)/denom - i)/(i-2); } @@ -227,19 +234,46 @@ auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) decltype(abs(*first)) i = 1; decltype(abs(*first)) num = 0; decltype(abs(*first)) denom = 0; - std::cout << "{"; - for (auto it = first; it != last; ++it) { - std::cout << abs(*it) << ", "; + for (auto it = first; it != last; ++it) + { decltype(abs(*first)) tmp = abs(*it); num += tmp*i; denom += tmp; ++i; } - std::cout << "}\n"; + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + decltype(abs(*first)) zero = 0; + return zero; + } return ((2*num)/denom - i)/(i-2); } +// The Hoyer sparsity measure is defined in: +// https://arxiv.org/pdf/0811.4706.pdf +template +auto hoyer_sparsity(ForwardIterator first, ForwardIterator last) +{ + using std::abs; + using std::sqrt; + typedef typename std::remove_const())>::type>::type RealOrComplex; + BOOST_ASSERT_MSG(first != last, "Computation of the Hoyer sparsity requires at least one sample."); + decltype(abs(*first)) l1 = 0; + decltype(abs(*first)) l2 = 0; + decltype(abs(*first)) n = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + l1 += tmp; + l2 += tmp*tmp; + n += 1; + } + decltype(abs(*first)) rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); +} }}} #endif diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index c82037932a..c535ba7026 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -141,6 +141,33 @@ void test_gini_coefficient() v[2] = 1; gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); +} + +template +void test_hoyer_sparsity() +{ + using std::sqrt; + Real tol = 5*std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + BOOST_TEST(abs(hs - 1) < tol); + + // Does it work with constant iterators? + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs) < tol); + } template @@ -166,6 +193,7 @@ void test_absolute_gini_coefficient() gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); BOOST_TEST(abs(gini) < tol); + // The Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). } template @@ -174,6 +202,11 @@ void test_l0_norm() std::vector v{0,0,1}; size_t count = boost::math::tools::l0_norm(v.begin(), v.end()); BOOST_TEST_EQ(count, 1); + + // Compiles with cbegin()/cend()? + count = boost::math::tools::l0_norm(v.cbegin(), v.cend()); + BOOST_TEST_EQ(count, 1); + } int main() @@ -209,6 +242,10 @@ int main() test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); + test_hoyer_sparsity(); + test_hoyer_sparsity(); + test_hoyer_sparsity(); + test_l0_norm(); test_l0_norm(); test_l0_norm(); From 60f653a1fe9b0def1f66c7cb06372b035732464a Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 7 Dec 2018 15:05:14 -0700 Subject: [PATCH 03/46] L1 and L2 norms. [CI SKIP] --- doc/vector_functionals/vector_functionals.qbk | 167 +++++++------ .../boost/math/tools/vector_functionals.hpp | 130 ++++++++-- test/vector_functionals_test.cpp | 231 ++++++++++++++++-- 3 files changed, 418 insertions(+), 110 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index 03a5724e20..60031149d9 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -30,9 +30,6 @@ namespace boost{ namespace math{ namespace tools { template auto shannon_entropy(ForwardIterator first, ForwardIterator last); - template - auto normalized_shannon_entropy(ForwardIterator first, ForwardIterator last); - template auto gini_coefficient(ForwardIterator first, ForwardIterator last); @@ -40,13 +37,13 @@ namespace boost{ namespace math{ namespace tools { auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); template - auto pq_mean(ForwardIterator first, ForwardIterator last, p, q); + auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); template auto lp_norm(ForwardIterator first, ForwardIterator last, p); template - auto l0_norm(ForwardIterator first, ForwardIterator last); + auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); template auto l1_norm(ForwardIterator first, ForwardIterator last); @@ -57,46 +54,9 @@ namespace boost{ namespace math{ namespace tools { template auto sup_norm(ForwardIterator first, ForwardIterator last); - template - auto lp_distance(RandomAccessContainer const & u, RandomAccessContainer const & v, p); - - template - auto l1_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); - - template - auto l2_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); - - template - auto sup_distance(RandomAccessContainer const & u, RandomAccessContainer const & v); - template auto total_variation(ForwardIterator first, ForwardIterator last); - template - auto lanczos_noisy_derivative(RandomAccessContainer const & v, time_step, time); - - template - auto kurtosis(ForwardIterator first, ForwardIterator last); - - template - auto skewness(ForwardIterator first, ForwardIterator last); - - template - auto covariance(RandomAccessContainer const & u, RandomAccessContainer const & v); - - template - auto simpsons_rule_quadrature(ForwardIterator first, ForwardIterator last); - - template - auto simpsons_three_eighths_quadrature(ForwardIterator first, ForwardIterator last); - - template - auto booles_rule_quadrature(ForwardIterator first, ForwardIterator last); - - template - auto inner_product(RandomAccessContainer const & u, RandomAccessContainer const & v); - - }}} `` @@ -115,30 +75,30 @@ These accumulators should be used in real-time applications; `vector_functionals As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags. We now describe each functional in detail. +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. [heading Mean] -Compute the mean of a container: - std::vector v{1,2,3,4,5}; - double mu = mean(v.begin(), v.end()); + double mu = boost::math::tools::mean(v.cbegin(), v.cend()); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. -The only requirement on the input is that it must be forward iterable, so you can use Eigen vectors, ublas vectors, Armadillo vectors, or a `std::forward_list` to hold your data. +The data is not modified and must be forward iterable. +Works with complex data. - -[heading Mean and Sample variance] - -Compute the mean and sample variance: +[heading Mean and Population Variance] std::vector v{1,2,3,4,5}; - auto [mu, s] = mean_and_sample_variance(v.begin(), v.end()); + auto [mu, s] = boost::math::tools::mean_and_population_variance(v.cbegin(), v.cend()); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. -Note that we do not provide computation of sample variance alone; -we are unaware of any one-pass, numerically stable computation of sample variance which does not simultaneously generate the mean. +Note that we do not provide computation of population variance alone; +we are unaware of any one-pass, numerically stable computation of population variance which does not simultaneously generate the mean. If the mean is not required, simply ignore it. The input datatype must be forward iterable and the range `[first, last)` must contain at least two elements. +It is /not/ in general sensible to pass complex numbers to this routine. [heading Median] @@ -151,28 +111,15 @@ Compute the median of a dataset: The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element nth-element]. Therefore, all requirements of `nth_element` are inherited by the median calculation. -[heading Sup norm] -Compute the sup norm of a dataset: - - std::vector v{-3, 2, 1}; - double sup = boost::math::tools::sup_norm(v.begin(), v.end()); - // sup = 3 - - std::vector> v{{0, -8}, {1,1}, {-3,2}}; - double sup = boost::math::tools::sup_norm(v.begin(), v.end()); - // sup = 8 - -Note how the calculation of \u2113[super p] norms can be performed in both real and complex arithmetic. - [heading Gini Coefficient] Compute the Gini coefficient of a dataset: std::vector v{1,0,0,0}; - double gini = gini_coefficient(v.begin(), v.end()); + double gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); // gini = 1, as v[0] holds all the "wealth" std::vector w{1,1,1,1}; - gini = gini_coefficient(w.begin(), w.end()); + gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); // gini = 0, as all elements are now equal. /Nota bene: The input data is altered-in particular, it is sorted./ @@ -183,7 +130,7 @@ Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)/ /n/. /Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. -However, since a single use case (measuring wealth inequality when some people have negative wealth) exists, we do not throw an exception when negative values are encountered. +However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. You should have /very/ good cause to pass negative values to the Gini coefficient calculator. The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. @@ -191,26 +138,26 @@ A sparse expansion has most of its norm concentrated in just a few coefficients, However, for measuring sparsity, the phase of the numbers is irrelevant, so `absolute_gini_coefficient` should be used instead: std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; - double abs_gini = absolute_gini_coefficient(v.begin(), v.end()); + double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); // now abs_gini = 1 std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; - double abs_gini = absolute_gini_coefficient(w.begin(), w.end()); + double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); // now abs_gini = 0 std::vector u{-1, 1, -1}; - double abs_gini = absolute_gini_coefficient(u.begin(), u.end()); + double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end()); // now abs_gini = 0 Again, Wikipedia denotes our scaling as a "sample Gini coefficient". We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient. -If sorting the input data is to much expense for a sparsity measure (is it going to be perfect anyway?), -consider using `hoyer_sparsity`. +If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), +consider calculating the Hoyer sparsity instead. [heading Hoyer Sparsity] -The Hoyer sparsity measure uses a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. +The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. As the name suggests, it is used to measure sparsity in an expansion in some basis. The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). @@ -222,11 +169,79 @@ Usage: // hs = 1 +[heading \u2113[super \u221E] norm] + +Computes the supremum norm of a dataset: + + std::vector v{-3, 2, 1}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // sup = 3 + + std::vector> v{{0, -8}, {1,1}, {-3,2}}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // sup = 8 + +Supports both real and complex arithmetic. +Container must be forward iterable and is not modified. + [heading \u2113[super /p/] norm] + std::vector v{-8, 0, 0}; + double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 7); + // sup = 8 + + std::vector> v{{1, 0}, {0,1}, {0,-1}}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend(), 3); + // sup = cbrt(3) + +Supports both real and complex arithmetic. +Container must be forward iterable and is not modified. + +[heading \u2113[super 0] pseudo-norm] + +Counts the number of non-zero elements in a container. + + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + // count = 1 + +Supports real and complex numbers. +Container must be forward iterable and the contents are not modified. +Note that this measure is not robust against numerical noise. + +[heading \u2113[super 1] norm] + +The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + // l1 = 3 + +Requires a forward iterable input, does not modify input data, and works with complex numbers. + +[heading \u2113[super 2] norm] + +The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l1 = boost::math::tools::l2_norm(v.begin(), v.end()); + // l1 = sqrt(3) + +Requires a forward iterable input, does not modify input data, and works with complex numbers. + +[heading Total Variation] + std::vector v{1,1,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // no variation in v, so tv = 0. + v = {0,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // variation is 1, so tv = 1. -[heading \u2113[super 0] norm] +The total variation only supports real numbers. +All the constituent operations to compute the total variation are well-defined for complex numbers, +but the computed result is not meaningful. +The input must be forward iterable, and is not modified. [heading References] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index 2cc0526802..edd17815c2 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -11,6 +11,8 @@ #include #include #include +#include + /* * A set of tools for computing scalar quantities associated with lists of numbers. */ @@ -54,11 +56,37 @@ mean_and_population_variance(ForwardIterator first, ForwardIterator last) return std::make_pair(M, Q/(k-1)); } -template -auto median(ForwardIterator first, ForwardIterator last) +template +auto median(RandomAccessIterator first, RandomAccessIterator last) { - typedef typename std::remove_const())>::type>::type Real; - Real m = 0; + typedef typename std::remove_const())>::type>::type Real; + Real m = std::numeric_limits::quiet_NaN(); + size_t num_elems = std::distance(first, last); + if (num_elems & 1) + { + nth_element(first, first+num_elems, last); + } + else + { + nth_element(first, first+num_elems, last); + } + return m; +} + +template +auto median(RandomAccessIterator first, RandomAccessIterator last, Compare comp) +{ + typedef typename std::remove_const())>::type>::type Real; + Real m = std::numeric_limits::quiet_NaN(); + size_t num_elems = std::distance(first, last); + if (num_elems & 1) + { + nth_element(first, first+num_elems, last); + } + else + { + nth_element(first, first+num_elems, last); + } return m; } @@ -66,17 +94,18 @@ auto median(ForwardIterator first, ForwardIterator last) template auto total_variation(ForwardIterator first, ForwardIterator last) { + using std::abs; typedef typename std::remove_const())>::type>::type Real; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); - Real tot = 0; + Real tv = 0; auto it = first; Real tmp = *it; while (++it != last) { - tot += abs(*it - tmp); + tv += abs(*it - tmp); tmp = *it; } - return tot; + return tv; } // Mallat, equation 10.4 uses the base-2 logarithm. @@ -103,14 +132,15 @@ auto sup_norm(ForwardIterator first, ForwardIterator last) BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); typedef typename std::remove_const())>::type>::type RealOrComplex; using std::abs; - if constexpr (boost::is_complex::value) + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { - auto it = max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + auto it = std::max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); return abs(*it); } else { - auto pair = minmax_element(first, last); + auto pair = std::minmax_element(first, last); if (abs(*pair.first) > abs(*pair.second)) { return abs(*pair.first); @@ -123,7 +153,75 @@ auto sup_norm(ForwardIterator first, ForwardIterator last) } template -size_t l0_norm(ForwardIterator first, ForwardIterator last) +auto l1_norm(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type RealOrComplex; + using std::abs; + decltype(abs(*first)) l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += abs(*first); + } + return l1; +} + +template +auto l2_norm(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type RealOrComplex; + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + typedef typename RealOrComplex::value_type Real; + Real l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it); + } + Real result = sqrt(l2); + if (!isfinite(result)) + { + Real a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it/a); + } + return a*sqrt(l2); + } + return result; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + RealOrComplex l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += (*it)*(*it); + } + RealOrComplex result = sqrt(l2); + if (!isfinite(result)) + { + RealOrComplex a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + RealOrComplex tmp = *it/a; + l2 += tmp*tmp; + } + return a*sqrt(l2); + } + return result; + } +} + + +template +size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) { typedef typename std::remove_const())>::type>::type RealOrComplex; size_t count = 0; @@ -144,7 +242,8 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c using std::is_floating_point; using std::isfinite; typedef typename std::remove_const())>::type>::type RealOrComplex; - if constexpr (boost::is_complex::value) + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { BOOST_ASSERT_MSG(p.real() >= 0, "For p < 0, the lp norm is not a norm."); BOOST_ASSERT_MSG(p.imag() == 0, "For imaginary p, the lp norm is not a norm."); @@ -168,14 +267,17 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c } return result; } - else if constexpr (is_floating_point::value) + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) { BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); RealOrComplex lp = 0; + for (auto it = first; it != last; ++it) { lp += pow(abs(*it), p); } + RealOrComplex result = pow(lp, 1/p); if (!isfinite(result)) { @@ -254,7 +356,7 @@ auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) // The Hoyer sparsity measure is defined in: // https://arxiv.org/pdf/0811.4706.pdf template -auto hoyer_sparsity(ForwardIterator first, ForwardIterator last) +auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { using std::abs; using std::sqrt; diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index c535ba7026..2ffef0d0c1 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -17,6 +17,15 @@ using boost::multiprecision::cpp_bin_float_50; using boost::multiprecision::cpp_complex_50; +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + template void test_mean() { @@ -44,7 +53,7 @@ void test_mean() BOOST_TEST(abs(mu - 4) < tol); // Does it work with ublas vectors? - boost::numeric::ublas::vector w(7); + boost::numeric::ublas::vector w(7); for (size_t i = 0; i < w.size(); ++i) { w[i] = i+1; @@ -54,6 +63,17 @@ void test_mean() } +template +void test_complex_mean() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{0,1},{0,2},{0,3},{0,4},{0,5}}; + auto mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); +} + template void test_mean_and_population_variance() { @@ -64,29 +84,64 @@ void test_mean_and_population_variance() BOOST_TEST(abs(sigma_sq) < tol); std::vector u{1}; - auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.begin(), u.end()); + auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend()); BOOST_TEST(abs(mu1 - 1) < tol); BOOST_TEST(abs(sigma1_sq) < tol); - std::vector w{0,1,0,1,0,1,0,1}; + std::array w{0,1,0,1,0,1,0,1}; auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); + } template void test_lp() { - Real tol = std::numeric_limits::epsilon(); - std::vector> v{{1,0}, {0,0}, {0,0}}; - Real l3 = boost::math::tools::lp_norm(v.begin(), v.end(), 3); + Real tol = 50*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + Real l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); BOOST_TEST(abs(l3 - 1) < tol); - std::vector u{1,0,0}; - l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + u[0] = -8; + l3 = boost::math::tools::lp_norm(u.cbegin(), u.cend(), 3); + BOOST_TEST(abs(l3 - 8) < tol); + + std::vector v(500); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = 7; + } + Real l8 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 8); + Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); + BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); + + // Does it work with ublas vectors? + // Does it handle the overflow of intermediates? + boost::numeric::ublas::vector w(4); + Real bignum = sqrt(std::numeric_limits::max())/256; + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = bignum; + } + Real l20 = boost::math::tools::lp_norm(w.cbegin(), w.cend(), 4); + expected = bignum*pow(w.size(), static_cast(1)/static_cast(4)); + BOOST_TEST(abs(l20 - expected) < tol*expected); +} + + +template +void test_complex_lp() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,0}, {0,0}}; + Real l3 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); BOOST_TEST(abs(l3 - 1) < tol); + } + template void test_total_variation() { @@ -123,11 +178,19 @@ void test_sup_norm() Real s = boost::math::tools::sup_norm(v.begin(), v.end()); BOOST_TEST(abs(s - 2) < tol); - std::vector> w{{0,-8}, {1,1}, {3,2}}; - s = boost::math::tools::sup_norm(w.begin(), w.end()); +} + +template +void test_complex_sup_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector w{{0,-8}, {1,1}, {3,2}}; + Real s = boost::math::tools::sup_norm(w.cbegin(), w.cend()); BOOST_TEST(abs(s-8) < tol); } + template void test_gini_coefficient() { @@ -167,9 +230,31 @@ void test_hoyer_sparsity() v[2] = 1; hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs) < tol); +} + +template +void test_complex_hoyer_sparsity() +{ + typedef typename Complex::value_type Real; + using std::sqrt; + Real tol = 5*std::numeric_limits::epsilon(); + std::vector v{{0,1}, {0, 0}, {0,0}}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + BOOST_TEST(abs(hs - 1) < tol); + + // Does it work with constant iterators? + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs - 1) < tol); + // All are the same magnitude: + v[0] = {0, 1}; + v[1] = {1, 0}; + v[2] = {0,-1}; + hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + BOOST_TEST(abs(hs) < tol); } + template void test_absolute_gini_coefficient() { @@ -197,18 +282,75 @@ void test_absolute_gini_coefficient() } template -void test_l0_norm() +void test_l0_pseudo_norm() { std::vector v{0,0,1}; - size_t count = boost::math::tools::l0_norm(v.begin(), v.end()); + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); BOOST_TEST_EQ(count, 1); // Compiles with cbegin()/cend()? - count = boost::math::tools::l0_norm(v.cbegin(), v.cend()); + count = boost::math::tools::l0_pseudo_norm(v.cbegin(), v.cend()); + BOOST_TEST_EQ(count, 1); + +} + +template +void test_complex_l0_pseudo_norm() +{ + std::vector v{{0,0}, {0,0}, {1,0}}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); BOOST_TEST_EQ(count, 1); +} + +template +void test_l1_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); +} + +template +void test_complex_l1_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}}; + Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); +} + +template +void test_l2_norm() +{ + using std::sqrt; + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + Real bignum = 4*sqrt(std::numeric_limits::max()); + v[0] = bignum; + v[1] = 0; + v[2] = 0; + v[3] = 0; + l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - bignum) < tol*l2); +} +template +void test_complex_l2_norm() +{ + using std::sqrt; + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); } + int main() { test_mean(); @@ -216,6 +358,9 @@ int main() test_mean(); test_mean(); + test_complex_mean>(); + test_complex_mean(); + test_mean_and_population_variance(); test_mean_and_population_variance(); test_mean_and_population_variance(); @@ -224,31 +369,77 @@ int main() test_lp(); test_lp(); test_lp(); + test_lp(); - test_total_variation(); - test_total_variation(); - test_total_variation(); - test_total_variation(); + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp(); test_sup_norm(); test_sup_norm(); test_sup_norm(); + test_sup_norm(); + + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm(); + + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm(); + + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm(); + + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm(); + + test_total_variation(); + test_total_variation(); + test_total_variation(); + test_total_variation(); test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); + test_gini_coefficient(); test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); + test_absolute_gini_coefficient(); test_hoyer_sparsity(); test_hoyer_sparsity(); test_hoyer_sparsity(); + test_hoyer_sparsity(); - test_l0_norm(); - test_l0_norm(); - test_l0_norm(); + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity>(); + test_complex_hoyer_sparsity(); return boost::report_errors(); } From bce92d78bafc3765df28f757dd9fd814cd680a3f Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 7 Dec 2018 18:44:40 -0700 Subject: [PATCH 04/46] Add Shannon entropy and kick off build. --- doc/vector_functionals/vector_functionals.qbk | 42 +++-- .../boost/math/tools/vector_functionals.hpp | 42 ++--- test/Jamfile.v2 | 1 + test/vector_functionals_test.cpp | 151 +++++++++++++++++- 4 files changed, 208 insertions(+), 28 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index 60031149d9..f8e8699864 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -27,9 +27,6 @@ namespace boost{ namespace math{ namespace tools { template auto absolute_median(ForwardIterator first, ForwardIterator last); - template - auto shannon_entropy(ForwardIterator first, ForwardIterator last); - template auto gini_coefficient(ForwardIterator first, ForwardIterator last); @@ -40,7 +37,7 @@ namespace boost{ namespace math{ namespace tools { auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); template - auto lp_norm(ForwardIterator first, ForwardIterator last, p); + auto shannon_entropy(ForwardIterator first, ForwardIterator last); template auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); @@ -54,6 +51,9 @@ namespace boost{ namespace math{ namespace tools { template auto sup_norm(ForwardIterator first, ForwardIterator last); + template + auto lp_norm(ForwardIterator first, ForwardIterator last, p); + template auto total_variation(ForwardIterator first, ForwardIterator last); @@ -111,6 +111,16 @@ Compute the median of a dataset: The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element nth-element]. Therefore, all requirements of `nth_element` are inherited by the median calculation. +[heading Absolute Median] + +The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance. +See [@https://wavelet-tour.github.io/ Mallat] for details. +The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators. + + std::vector v{-1, 1}; + double m = boost::math::tools::absolute_median(v.begin(), v.end()); + // m = 1 + [heading Gini Coefficient] Compute the Gini coefficient of a dataset: @@ -126,8 +136,8 @@ Compute the Gini coefficient of a dataset: /Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "sample Gini coefficient". -Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1/N]. -If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)/ /n/. +Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1//n/]. +If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)//n/. /Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. @@ -161,13 +171,18 @@ The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113 As the name suggests, it is used to measure sparsity in an expansion in some basis. The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). +For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. Usage: std::vector v{1,0,0}; Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); // hs = 1 + std::vector v{1,-1,1}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + // hs = 0 +The container must be forward iterable and the contents are not modified. [heading \u2113[super \u221E] norm] @@ -195,7 +210,7 @@ Container must be forward iterable and is not modified. // sup = cbrt(3) Supports both real and complex arithmetic. -Container must be forward iterable and is not modified. +The container must be forward iterable and the contents are not modified. [heading \u2113[super 0] pseudo-norm] @@ -206,7 +221,7 @@ Counts the number of non-zero elements in a container. // count = 1 Supports real and complex numbers. -Container must be forward iterable and the contents are not modified. +The container must be forward iterable and the contents are not modified. Note that this measure is not robust against numerical noise. [heading \u2113[super 1] norm] @@ -241,7 +256,16 @@ Requires a forward iterable input, does not modify input data, and works with co The total variation only supports real numbers. All the constituent operations to compute the total variation are well-defined for complex numbers, but the computed result is not meaningful. -The input must be forward iterable, and is not modified. +The container must be forward iterable, and the contents are not modified. + +[heading Shannon Entropy] + + std::vector v{1/2.0, 1/2.0}; + double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); + // Hs = ln(2). + +The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced. +The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. [heading References] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index edd17815c2..eb56cbfbea 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -59,35 +59,44 @@ mean_and_population_variance(ForwardIterator first, ForwardIterator last) template auto median(RandomAccessIterator first, RandomAccessIterator last) { - typedef typename std::remove_const())>::type>::type Real; - Real m = std::numeric_limits::quiet_NaN(); size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); if (num_elems & 1) { - nth_element(first, first+num_elems, last); + auto middle = first + (num_elems - 1)/2; + nth_element(first, middle, last); + return *middle; } else { - nth_element(first, first+num_elems, last); + auto middle = first + num_elems/2 - 1; + nth_element(first, middle, last); + nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; } - return m; } -template -auto median(RandomAccessIterator first, RandomAccessIterator last, Compare comp) +template +auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) { - typedef typename std::remove_const())>::type>::type Real; - Real m = std::numeric_limits::quiet_NaN(); + using std::abs; + typedef typename std::remove_const())>::type>::type RealOrComplex; size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [](RealOrComplex a, RealOrComplex b) { return abs(a) < abs(b);}; if (num_elems & 1) { - nth_element(first, first+num_elems, last); + auto middle = first + (num_elems - 1)/2; + nth_element(first, middle, last, comparator); + return abs(*middle); } else { - nth_element(first, first+num_elems, last); + auto middle = first + num_elems/2 - 1; + nth_element(first, middle, last, comparator); + nth_element(middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); } - return m; } // Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: @@ -113,14 +122,13 @@ template auto shannon_entropy(ForwardIterator first, ForwardIterator last) { typedef typename std::remove_const())>::type>::type Real; - using std::log2; + using std::log; Real entropy = 0; for (auto it = first; it != last; ++it) { - Real tmp = *it; - if (tmp != 0) + if (*it != 0) { - entropy += tmp*log2(tmp); + entropy += (*it)*log(*it); } } return -entropy; @@ -155,7 +163,6 @@ auto sup_norm(ForwardIterator first, ForwardIterator last) template auto l1_norm(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type RealOrComplex; using std::abs; decltype(abs(*first)) l1 = 0; for (auto it = first; it != last; ++it) @@ -360,7 +367,6 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { using std::abs; using std::sqrt; - typedef typename std::remove_const())>::type>::type RealOrComplex; BOOST_ASSERT_MSG(first != last, "Computation of the Hoyer sparsity requires at least one sample."); decltype(abs(*first)) l1 = 0; diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 911572d661..74a2a85a07 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -902,6 +902,7 @@ test-suite misc : [ run test_constant_generate.cpp : : : release USE_CPP_FLOAT=1 off:no ] [ run test_cubic_b_spline.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] # does not in fact require C++17 constexpr; requires C++17 std::size. + [ run vector_functionals_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index 2ffef0d0c1..01664d2031 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -8,8 +8,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -92,9 +95,124 @@ void test_mean_and_population_variance() auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); +} +template +void test_median() +{ + std::mt19937 g(12); + std::vector v{1,2,3,4,5,6,7}; + + Real m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + v = {1,2,3,3,4,5}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {2,4}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1,1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,2,3}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); } +template +void test_absolute_median() +{ + std::mt19937 g(12); + std::vector v{-1, 2, -3, 4, -5, 6, -7}; + + Real m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + v = {1, -2, -3, 3, -4, -5}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {-1}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {-1, 1}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {2, -4}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1, -1, 1}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1, 2, -3}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); +} + + +template +void test_complex_absolute_median() +{ + typedef typename Complex::value_type Real; + std::mt19937 g(18); + std::vector v{{0,1}, {0,-2},{0,3}, {0,-4}, {0,5}, {0,-6}, {0,7}}; + + Real m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + v = {{0,1}, {0,-2}, {0,-3}, {0,3}, {0,4}, {0,-5}}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {{0, -1}}; + m = boost::math::tools::absolute_median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); +} + + template void test_lp() { @@ -335,7 +453,7 @@ void test_l2_norm() v[1] = 0; v[2] = 0; v[3] = 0; - l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + l2 = boost::math::tools::l2_norm(v.begin(), v.end()); BOOST_TEST(abs(l2 - bignum) < tol*l2); } @@ -350,6 +468,18 @@ void test_complex_l2_norm() BOOST_TEST(abs(l2 - 2) < tol); } +template +void test_shannon_entropy() +{ + Real tol = 100*std::numeric_limits::epsilon(); + using boost::math::constants::half; + using boost::math::constants::ln_two; + std::vector v(30, half()); + Real Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); + Real expected = v.size()*ln_two()/2; + BOOST_TEST(abs(Hs - expected) < tol*expected); +} + int main() { @@ -366,6 +496,21 @@ int main() test_mean_and_population_variance(); test_mean_and_population_variance(); + test_median(); + test_median(); + test_median(); + test_median(); + + test_absolute_median(); + test_absolute_median(); + test_absolute_median(); + test_absolute_median(); + + test_complex_absolute_median>(); + test_complex_absolute_median>(); + test_complex_absolute_median>(); + test_complex_absolute_median(); + test_lp(); test_lp(); test_lp(); @@ -436,6 +581,10 @@ int main() test_hoyer_sparsity(); test_hoyer_sparsity(); + test_shannon_entropy(); + test_shannon_entropy(); + test_shannon_entropy(); + test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity>(); From 5a0ff712bd317d1a4ac5fd7292d6f1579e428f07 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 8 Dec 2018 11:11:35 -0700 Subject: [PATCH 05/46] Add range calls. [CI SKIP] --- doc/vector_functionals/vector_functionals.qbk | 65 +++++++++- .../boost/math/tools/vector_functionals.hpp | 114 +++++++++++++++++- test/vector_functionals_test.cpp | 73 ++++++++++- 3 files changed, 246 insertions(+), 6 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index f8e8699864..b7394f1999 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -15,45 +15,93 @@ namespace boost{ namespace math{ namespace tools { + template + auto mean(Container const & c); + template auto mean(ForwardIterator first, ForwardIterator last); + template + auto mean_and_population_variance(Container const & c); + template - auto mean_and_variance(ForwardIterator first, ForwardIterator last); + auto mean_and_population_variance(ForwardIterator first, ForwardIterator last); + + template + auto median(Container & c); template auto median(ForwardIterator first, ForwardIterator last); + template + auto absolute_median(Container & c); + template auto absolute_median(ForwardIterator first, ForwardIterator last); + template + auto gini_coefficient(Container & c); + template auto gini_coefficient(ForwardIterator first, ForwardIterator last); + template + auto absolute_gini_coefficient(Container & c); + template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + template + auto hoyer_sparsity(Container const & c); + template auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); + template + auto shannon_entropy(Container const & c); + template auto shannon_entropy(ForwardIterator first, ForwardIterator last); + template + auto shannon_cost(Container const & c); + + template + auto shannon_cost(ForwardIterator first, ForwardIterator last); + + template + auto l0_pseudo_norm(Container const & c); + template auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); + template + auto l1_norm(Container const & c); + template auto l1_norm(ForwardIterator first, ForwardIterator last); + template + auto l2_norm(Container const & c); + template auto l2_norm(ForwardIterator first, ForwardIterator last); + template + auto sup_norm(Container const & c); + template auto sup_norm(ForwardIterator first, ForwardIterator last); + template + auto lp_norm(Container const & c); + template auto lp_norm(ForwardIterator first, ForwardIterator last, p); + template + auto total_variation(Container const & c); + template auto total_variation(ForwardIterator first, ForwardIterator last); @@ -83,6 +131,8 @@ These routines are usable float, double, long double, and Boost.Multiprecision p std::vector v{1,2,3,4,5}; double mu = boost::math::tools::mean(v.cbegin(), v.cend()); + // Alternative syntax if you want to use entire container: + double mu = boost::math::tools::mean(v); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. The data is not modified and must be forward iterable. @@ -93,6 +143,11 @@ Works with complex data. std::vector v{1,2,3,4,5}; auto [mu, s] = boost::math::tools::mean_and_population_variance(v.cbegin(), v.cend()); +If you don't need to calculate on a subset of the input, then the range call is more terse: + + std::vector v{1,2,3,4,5}; + auto [mu, s] = boost::math::tools::mean_and_population_variance(v); + The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. Note that we do not provide computation of population variance alone; we are unaware of any one-pass, numerically stable computation of population variance which does not simultaneously generate the mean. @@ -268,6 +323,14 @@ The Shannon entropy only supports non-negative real-valued inputs, presumably fo The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. +[heading Shannon Cost] + + std::vector v{-1, 1,-1}; + double Ks = boost::math::tools::shannon_cost(v.begin(), v.end()); + // Ks = 0; concentration of the vector is minimized. + + + [heading References] * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index eb56cbfbea..e611b12566 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -35,6 +35,12 @@ mean(ForwardIterator first, ForwardIterator last) return mu; } +template +inline auto mean(Container const & v) +{ + return mean(v.cbegin(), v.cend()); +} + template auto mean_and_population_variance(ForwardIterator first, ForwardIterator last) @@ -56,6 +62,12 @@ mean_and_population_variance(ForwardIterator first, ForwardIterator last) return std::make_pair(M, Q/(k-1)); } +template +inline auto mean_and_population_variance(Container const & v) +{ + return mean_and_population_variance(v.cbegin(), v.cend()); +} + template auto median(RandomAccessIterator first, RandomAccessIterator last) { @@ -76,6 +88,14 @@ auto median(RandomAccessIterator first, RandomAccessIterator last) } } + +template +inline auto median(RandomAccessContainer & v) +{ + return median(v.begin(), v.end()); +} + + template auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) { @@ -99,6 +119,12 @@ auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) } } +template +inline auto absolute_median(RandomAccessContainer & v) +{ + return absolute_median(v.begin(), v.end()); +} + // Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: template auto total_variation(ForwardIterator first, ForwardIterator last) @@ -117,7 +143,13 @@ auto total_variation(ForwardIterator first, ForwardIterator last) return tv; } -// Mallat, equation 10.4 uses the base-2 logarithm. +template +inline auto total_variation(Container const & v) +{ + return total_variation(v.cbegin(), v.cend()); +} + + template auto shannon_entropy(ForwardIterator first, ForwardIterator last) { @@ -134,6 +166,36 @@ auto shannon_entropy(ForwardIterator first, ForwardIterator last) return -entropy; } +template +inline auto shannon_entropy(Container const & v) +{ + return shannon_entropy(v.cbegin(), v.cend()); +} + +template +auto shannon_cost(ForwardIterator first, ForwardIterator last) +{ + typedef typename std::remove_const())>::type>::type Real; + using std::log; + Real cost = 0; + for (auto it = first; it != last; ++it) + { + if (*it != 0) + { + Real tmp = abs(*it); + cost += tmp*tmp*log(tmp*tmp); + } + } + return -cost; +} + +template +inline auto shannon_cost(Container const & v) +{ + return shannon_cost(v.cbegin(), v.cend()); +} + + template auto sup_norm(ForwardIterator first, ForwardIterator last) { @@ -160,6 +222,12 @@ auto sup_norm(ForwardIterator first, ForwardIterator last) } } +template +inline auto sup_norm(Container const & v) +{ + return sup_norm(v.cbegin(), v.cend()); +} + template auto l1_norm(ForwardIterator first, ForwardIterator last) { @@ -172,6 +240,13 @@ auto l1_norm(ForwardIterator first, ForwardIterator last) return l1; } +template +inline auto l1_norm(Container const & v) +{ + return l1_norm(v.cbegin(), v.cend()); +} + + template auto l2_norm(ForwardIterator first, ForwardIterator last) { @@ -226,6 +301,11 @@ auto l2_norm(ForwardIterator first, ForwardIterator last) } } +template +inline auto l2_norm(Container const & v) +{ + return l2_norm(v.cbegin(), v.cend()); +} template size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) @@ -242,6 +322,12 @@ size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) return count; } +template +inline size_t l0_pseudo_norm(Container const & v) +{ + return l0_pseudo_norm(v.cbegin(), v.cend()); +} + template auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_const())>::type>::type p) { @@ -304,6 +390,13 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c } } +template +inline auto lp_norm(Container const & v, typename Container::value_type p) +{ + return lp_norm(v.cbegin(), v.cend(), p); +} + + template auto gini_coefficient(ForwardIterator first, ForwardIterator last) { @@ -330,6 +423,12 @@ auto gini_coefficient(ForwardIterator first, ForwardIterator last) return ((2*num)/denom - i)/(i-2); } +template +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(v.begin(), v.end()); +} + template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) @@ -360,6 +459,12 @@ auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) return ((2*num)/denom - i)/(i-2); } +template +inline auto absolute_gini_coefficient(RandomAccessContainer & v) +{ + return absolute_gini_coefficient(v.begin(), v.end()); +} + // The Hoyer sparsity measure is defined in: // https://arxiv.org/pdf/0811.4706.pdf template @@ -383,5 +488,12 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) return (rootn - l1/sqrt(l2) )/ (rootn - 1); } +template +inline auto hoyer_sparsity(Container const & v) +{ + return hoyer_sparsity(v.cbegin(), v.cend()); +} + + }}} #endif diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index 01664d2031..58254be8ff 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -37,6 +37,11 @@ void test_mean() Real mu = boost::math::tools::mean(v.begin(), v.end()); BOOST_TEST(abs(mu - 3) < tol); + // Does range call work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); + + // Can we successfully average only part of the vector? mu = boost::math::tools::mean(v.begin(), v.begin() + 3); BOOST_TEST(abs(mu - 2) < tol); @@ -75,6 +80,11 @@ void test_complex_mean() auto mu = boost::math::tools::mean(v.begin(), v.end()); BOOST_TEST(abs(mu.imag() - 3) < tol); BOOST_TEST(abs(mu.real()) < tol); + + // Does range work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); } template @@ -95,6 +105,11 @@ void test_mean_and_population_variance() auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); + + auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w); + BOOST_TEST(abs(mu3 - 1.0/2.0) < tol); + BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol); + } template @@ -107,7 +122,8 @@ void test_median() BOOST_TEST_EQ(m, 4); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median(v.begin(), v.end()); + // Does range call work? + m = boost::math::tools::median(v); BOOST_TEST_EQ(m, 4); v = {1,2,3,3,4,5}; @@ -151,7 +167,7 @@ void test_absolute_median() BOOST_TEST_EQ(m, 4); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v.begin(), v.end()); + m = boost::math::tools::absolute_median(v); BOOST_TEST_EQ(m, 4); v = {1, -2, -3, 3, -4, -5}; @@ -197,7 +213,7 @@ void test_complex_absolute_median() BOOST_TEST_EQ(m, 4); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v.begin(), v.end()); + m = boost::math::tools::absolute_median(v); BOOST_TEST_EQ(m, 4); v = {{0,1}, {0,-2}, {0,-3}, {0,3}, {0,4}, {0,-5}}; @@ -230,7 +246,7 @@ void test_lp() for (size_t i = 0; i < v.size(); ++i) { v[i] = 7; } - Real l8 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 8); + Real l8 = boost::math::tools::lp_norm(v, 8); Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); @@ -257,6 +273,9 @@ void test_complex_lp() Real l3 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); BOOST_TEST(abs(l3 - 1) < tol); + l3 = boost::math::tools::lp_norm(v, 3); + BOOST_TEST(abs(l3 - 1) < tol); + } @@ -268,6 +287,9 @@ void test_total_variation() Real tv = boost::math::tools::total_variation(v.begin(), v.end()); BOOST_TEST(tv >= 0 && abs(tv) < tol); + tv = boost::math::tools::total_variation(v); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + v[1] = 2; tv = boost::math::tools::total_variation(v.begin(), v.end()); BOOST_TEST(abs(tv - 1) < tol); @@ -296,6 +318,9 @@ void test_sup_norm() Real s = boost::math::tools::sup_norm(v.begin(), v.end()); BOOST_TEST(abs(s - 2) < tol); + s = boost::math::tools::sup_norm(v); + BOOST_TEST(abs(s - 2) < tol); + } template @@ -306,6 +331,9 @@ void test_complex_sup_norm() std::vector w{{0,-8}, {1,1}, {3,2}}; Real s = boost::math::tools::sup_norm(w.cbegin(), w.cend()); BOOST_TEST(abs(s-8) < tol); + + s = boost::math::tools::sup_norm(w); + BOOST_TEST(abs(s-8) < tol); } @@ -317,6 +345,9 @@ void test_gini_coefficient() Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - 1) < tol); + v[0] = 1; v[1] = 1; v[2] = 1; @@ -339,6 +370,9 @@ void test_hoyer_sparsity() Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); BOOST_TEST(abs(hs - 1) < tol); + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + // Does it work with constant iterators? hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs - 1) < tol); @@ -360,6 +394,9 @@ void test_complex_hoyer_sparsity() Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); BOOST_TEST(abs(hs - 1) < tol); + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + // Does it work with constant iterators? hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs - 1) < tol); @@ -381,6 +418,9 @@ void test_absolute_gini_coefficient() Real gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); + gini = boost::math::tools::absolute_gini_coefficient(v); + BOOST_TEST(abs(gini - 1) < tol); + v[0] = 1; v[1] = -1; v[2] = 1; @@ -410,6 +450,9 @@ void test_l0_pseudo_norm() count = boost::math::tools::l0_pseudo_norm(v.cbegin(), v.cend()); BOOST_TEST_EQ(count, 1); + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); + } template @@ -418,6 +461,10 @@ void test_complex_l0_pseudo_norm() std::vector v{{0,0}, {0,0}, {1,0}}; size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); + } template @@ -427,6 +474,10 @@ void test_l1_norm() std::vector v{1,1,1}; Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + } template @@ -437,6 +488,10 @@ void test_complex_l1_norm() std::vector v{{1,0}, {0,1},{0,-1}}; Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + } template @@ -448,6 +503,9 @@ void test_l2_norm() Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); BOOST_TEST(abs(l2 - 2) < tol); + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + Real bignum = 4*sqrt(std::numeric_limits::max()); v[0] = bignum; v[1] = 0; @@ -466,6 +524,10 @@ void test_complex_l2_norm() std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); BOOST_TEST(abs(l2 - 2) < tol); + + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + } template @@ -478,6 +540,9 @@ void test_shannon_entropy() Real Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); Real expected = v.size()*ln_two()/2; BOOST_TEST(abs(Hs - expected) < tol*expected); + + Hs = boost::math::tools::shannon_entropy(v); + BOOST_TEST(abs(Hs - expected) < tol*expected); } From b7dabe272c3d6ae761db8d0d31271e05889f6ff6 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 8 Dec 2018 12:29:05 -0700 Subject: [PATCH 06/46] Add branches for integral input types where sensible. [CI SKIP] --- doc/vector_functionals/vector_functionals.qbk | 36 ++++--- .../boost/math/tools/vector_functionals.hpp | 73 ++++++++++---- test/vector_functionals_test.cpp | 94 +++++++++++++++++++ 3 files changed, 173 insertions(+), 30 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index b7394f1999..d62c5a5f7b 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -126,6 +126,7 @@ We now describe each functional in detail. Our examples use `std::vector` to hold the data, but this not required. In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. These routines are usable float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +For certain operations (total variation, for example) integer inputs are supported. [heading Mean] @@ -136,7 +137,8 @@ These routines are usable float, double, long double, and Boost.Multiprecision p The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. The data is not modified and must be forward iterable. -Works with complex data. +Works with real, complex and integer data. +If the input is an integer type, the output is a double precision float. [heading Mean and Population Variance] @@ -154,6 +156,7 @@ we are unaware of any one-pass, numerically stable computation of population var If the mean is not required, simply ignore it. The input datatype must be forward iterable and the range `[first, last)` must contain at least two elements. It is /not/ in general sensible to pass complex numbers to this routine. +If integers are passed as input, then the output is a double precision float. [heading Median] @@ -163,8 +166,8 @@ Compute the median of a dataset: double m = boost::math::tools::median(v.begin(), v.end()); /Nota bene: The input vector is modified./ -The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element nth-element]. -Therefore, all requirements of `nth_element` are inherited by the median calculation. +The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. +Therefore, all requirements of `std::nth_element` are inherited by the median calculation. [heading Absolute Median] @@ -191,8 +194,8 @@ Compute the Gini coefficient of a dataset: /Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "sample Gini coefficient". -Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1//n/]. -If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)//n/. +Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1/ /n/]. +If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)/ /n/. /Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. @@ -231,13 +234,14 @@ For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. Usage: std::vector v{1,0,0}; - Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + Real hs = boost::math::tools::hoyer_sparsity(v); // hs = 1 std::vector v{1,-1,1}; Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); // hs = 0 The container must be forward iterable and the contents are not modified. +Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. [heading \u2113[super \u221E] norm] @@ -251,7 +255,7 @@ Computes the supremum norm of a dataset: double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); // sup = 8 -Supports both real and complex arithmetic. +Supports real, integral, and complex arithmetic. Container must be forward iterable and is not modified. [heading \u2113[super /p/] norm] @@ -275,9 +279,10 @@ Counts the number of non-zero elements in a container. size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); // count = 1 -Supports real and complex numbers. +Supports real, integral, and complex numbers. The container must be forward iterable and the contents are not modified. -Note that this measure is not robust against numerical noise. +Note that this measure is not robust against numerical noise and is therefore not as useful as (say) the Hoyer sparsity in numerical applications. +Works will real, complex, and integral inputs. [heading \u2113[super 1] norm] @@ -287,7 +292,7 @@ The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is double l1 = boost::math::tools::l1_norm(v.begin(), v.end()); // l1 = 3 -Requires a forward iterable input, does not modify input data, and works with complex numbers. +Requires a forward iterable input, does not modify input data, and works with real, integral, and complex numbers. [heading \u2113[super 2] norm] @@ -307,10 +312,12 @@ Requires a forward iterable input, does not modify input data, and works with co v = {0,1}; double tv = boost::math::tools::total_variation(v.begin(), v.end()); // variation is 1, so tv = 1. + std::vector v{1,1,1}; + int tv = boost::math::tools::total_variation(v); -The total variation only supports real numbers. +The total variation only supports real numbers and /signed/ integers. All the constituent operations to compute the total variation are well-defined for complex numbers, -but the computed result is not meaningful. +but the computed result is not meaningful; a 2D total variation is more appropriate. The container must be forward iterable, and the contents are not modified. [heading Shannon Entropy] @@ -329,6 +336,9 @@ The natural logarithm is used to compute the Shannon entropy; all other "Shannon double Ks = boost::math::tools::shannon_cost(v.begin(), v.end()); // Ks = 0; concentration of the vector is minimized. +The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. +The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sup /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). +See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. [heading References] @@ -336,7 +346,7 @@ The natural logarithm is used to compute the Shannon entropy; all other "Shannon * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. * Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. * Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. - +* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. [endsect] [/section:vector_functionals Vector Functionals] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index e611b12566..e9de60cf2e 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -26,13 +26,26 @@ mean(ForwardIterator first, ForwardIterator last) { typedef typename std::remove_const())>::type>::type Real; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); - Real mu = 0; - Real i = 1; - for(auto it = first; it != last; ++it) { - mu = mu + (*it - mu)/i; - i += 1; + if constexpr (std::is_integral::value) + { + double mu = 0; + double i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } + else + { + Real mu = 0; + Real i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; } - return mu; } template @@ -48,18 +61,35 @@ mean_and_population_variance(ForwardIterator first, ForwardIterator last) typedef typename std::remove_const())>::type>::type Real; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); // Higham, Accuracy and Stability, equation 1.6a and 1.6b: - Real M = *first; - Real Q = 0; - Real k = 2; - for (auto it = first + 1; it != last; ++it) + if constexpr (std::is_integral::value) { - Real tmp = *it - M; - Q = Q + ((k-1)*tmp*tmp)/k; - M = M + tmp/k; - k += 1; + double M = *first; + double Q = 0; + double k = 2; + for (auto it = first + 1; it != last; ++it) + { + double tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + return std::make_pair(M, Q/(k-1)); } + else + { + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = first + 1; it != last; ++it) + { + Real tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } - return std::make_pair(M, Q/(k-1)); + return std::make_pair(M, Q/(k-1)); + } } template @@ -470,6 +500,7 @@ inline auto absolute_gini_coefficient(RandomAccessContainer & v) template auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { + typedef typename std::remove_const())>::type>::type RealIntOrComplex; using std::abs; using std::sqrt; BOOST_ASSERT_MSG(first != last, "Computation of the Hoyer sparsity requires at least one sample."); @@ -484,8 +515,16 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) l2 += tmp*tmp; n += 1; } - decltype(abs(*first)) rootn = sqrt(n); - return (rootn - l1/sqrt(l2) )/ (rootn - 1); + if constexpr (std::is_integral::value) + { + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else + { + decltype(abs(*first)) rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } } template diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index 58254be8ff..35afd80053 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -29,6 +29,15 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ +template +void test_integer_mean() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + double mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); +} + template void test_mean() { @@ -112,6 +121,16 @@ void test_mean_and_population_variance() } +template +void test_integer_mean_and_population_variance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v); + BOOST_TEST(abs(mu - 1) < tol); + BOOST_TEST(abs(sigma_sq) < tol); +} + template void test_median() { @@ -278,6 +297,32 @@ void test_complex_lp() } +template +void test_integer_total_variation() +{ + std::vector v{1,1}; + Z tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv,0); + + v[1] = 2; + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST_EQ(tv,1); + + v.resize(50); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, v.size() -1); + + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i*i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, (v.size() -1)*(v.size()-1)); +} template void test_total_variation() @@ -320,7 +365,17 @@ void test_sup_norm() s = boost::math::tools::sup_norm(v); BOOST_TEST(abs(s - 2) < tol); +} + +template +void test_integer_sup_norm() +{ + std::vector v{-2,1,0}; + Z s = boost::math::tools::sup_norm(v.begin(), v.end()); + BOOST_TEST_EQ(s, 2); + s = boost::math::tools::sup_norm(v); + BOOST_TEST_EQ(s,2); } template @@ -384,6 +439,23 @@ void test_hoyer_sparsity() BOOST_TEST(abs(hs) < tol); } +template +void test_integer_hoyer_sparsity() +{ + using std::sqrt; + double tol = 5*std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + double hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + hs = boost::math::tools::hoyer_sparsity(v); + BOOST_TEST(abs(hs) < tol); +} + + template void test_complex_hoyer_sparsity() { @@ -477,7 +549,14 @@ void test_l1_norm() l1 = boost::math::tools::l1_norm(v); BOOST_TEST(abs(l1 - 3) < tol); +} +template +void test_integer_l1_norm() +{ + std::vector v{1,1,1}; + Z l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST_EQ(l1, 3); } template @@ -548,6 +627,10 @@ void test_shannon_entropy() int main() { + test_integer_mean(); + test_integer_mean(); + test_integer_mean(); + test_mean(); test_mean(); test_mean(); @@ -561,6 +644,8 @@ int main() test_mean_and_population_variance(); test_mean_and_population_variance(); + test_integer_mean_and_population_variance(); + test_median(); test_median(); test_median(); @@ -591,11 +676,14 @@ int main() test_sup_norm(); test_sup_norm(); + test_integer_sup_norm(); + test_complex_sup_norm>(); test_complex_sup_norm>(); test_complex_sup_norm>(); test_complex_sup_norm(); + test_l0_pseudo_norm(); test_l0_pseudo_norm(); test_l0_pseudo_norm(); test_l0_pseudo_norm(); @@ -611,6 +699,8 @@ int main() test_l1_norm(); test_l1_norm(); + test_integer_l1_norm(); + test_complex_l2_norm>(); test_complex_l2_norm>(); test_complex_l2_norm>(); @@ -631,6 +721,8 @@ int main() test_total_variation(); test_total_variation(); + test_integer_total_variation(); + test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); @@ -646,6 +738,8 @@ int main() test_hoyer_sparsity(); test_hoyer_sparsity(); + test_integer_hoyer_sparsity(); + test_shannon_entropy(); test_shannon_entropy(); test_shannon_entropy(); From 5f143244f595c6ea9fc8f5866b5860ba1f6e1e6f Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 8 Dec 2018 14:09:21 -0700 Subject: [PATCH 07/46] Fully qualify std::nth_element, add tests for std::array. [CI SKIP] --- doc/vector_functionals/vector_functionals.qbk | 7 +-- .../boost/math/tools/vector_functionals.hpp | 12 ++--- test/vector_functionals_test.cpp | 50 ++++++++++++++++++- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/vector_functionals.qbk index d62c5a5f7b..b5209feda6 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/vector_functionals.qbk @@ -125,7 +125,7 @@ As a reminder, remember that to actually /get/ vectorization, compile with `-mar We now describe each functional in detail. Our examples use `std::vector` to hold the data, but this not required. In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. -These routines are usable float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. For certain operations (total variation, for example) integer inputs are supported. [heading Mean] @@ -218,7 +218,8 @@ However, for measuring sparsity, the phase of the numbers is irrelevant, so `abs // now abs_gini = 0 Again, Wikipedia denotes our scaling as a "sample Gini coefficient". -We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient. +We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient, +whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), consider calculating the Hoyer sparsity instead. @@ -337,7 +338,7 @@ The natural logarithm is used to compute the Shannon entropy; all other "Shannon // Ks = 0; concentration of the vector is minimized. The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. -The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sup /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). +The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index e9de60cf2e..033eb27aa4 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -106,14 +106,14 @@ auto median(RandomAccessIterator first, RandomAccessIterator last) if (num_elems & 1) { auto middle = first + (num_elems - 1)/2; - nth_element(first, middle, last); + std::nth_element(first, middle, last); return *middle; } else { auto middle = first + num_elems/2 - 1; - nth_element(first, middle, last); - nth_element(middle, middle+1, last); + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); return (*middle + *(middle+1))/2; } } @@ -137,14 +137,14 @@ auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) if (num_elems & 1) { auto middle = first + (num_elems - 1)/2; - nth_element(first, middle, last, comparator); + std::nth_element(first, middle, last, comparator); return abs(*middle); } else { auto middle = first + num_elems/2 - 1; - nth_element(first, middle, last, comparator); - nth_element(middle, middle+1, last, comparator); + std::nth_element(first, middle, last, comparator); + std::nth_element(middle, middle+1, last, comparator); return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); } } diff --git a/test/vector_functionals_test.cpp b/test/vector_functionals_test.cpp index 35afd80053..187d4865a6 100644 --- a/test/vector_functionals_test.cpp +++ b/test/vector_functionals_test.cpp @@ -36,6 +36,11 @@ void test_integer_mean() std::vector v{1,2,3,4,5}; double mu = boost::math::tools::mean(v); BOOST_TEST(abs(mu - 3) < tol); + + // Work with std::array? + std::array w{1,2,3,4,5}; + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); } template @@ -50,7 +55,6 @@ void test_mean() mu = boost::math::tools::mean(v); BOOST_TEST(abs(mu - 3) < tol); - // Can we successfully average only part of the vector? mu = boost::math::tools::mean(v.begin(), v.begin() + 3); BOOST_TEST(abs(mu - 2) < tol); @@ -174,6 +178,11 @@ void test_median() std::shuffle(v.begin(), v.end(), g); m = boost::math::tools::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 2); + + // Does it work with std::array? + std::array w{1,2,3}; + m = boost::math::tools::median(w); + BOOST_TEST_EQ(m, 2); } template @@ -218,6 +227,10 @@ void test_absolute_median() std::shuffle(v.begin(), v.end(), g); m = boost::math::tools::absolute_median(v.begin(), v.end()); BOOST_TEST_EQ(m, 2); + + std::array w{1, 2, -3}; + m = boost::math::tools::absolute_median(w); + BOOST_TEST_EQ(m, 2); } @@ -322,6 +335,11 @@ void test_integer_total_variation() tv = boost::math::tools::total_variation(v); BOOST_TEST_EQ(tv, (v.size() -1)*(v.size()-1)); + + // Work with std::array? + std::array w{1,1}; + tv = boost::math::tools::total_variation(w); + BOOST_TEST_EQ(tv,0); } template @@ -365,6 +383,12 @@ void test_sup_norm() s = boost::math::tools::sup_norm(v); BOOST_TEST(abs(s - 2) < tol); + + // Work with std::array? + std::array w{-2,1,0}; + s = boost::math::tools::sup_norm(w); + BOOST_TEST(abs(s - 2) < tol); + } template @@ -414,6 +438,10 @@ void test_gini_coefficient() v[2] = 0; gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); } template @@ -437,6 +465,10 @@ void test_hoyer_sparsity() v[2] = 1; hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs) < tol); + + std::array w{1,1,1}; + hs = boost::math::tools::hoyer_sparsity(w); + BOOST_TEST(abs(hs) < tol); } template @@ -525,6 +557,9 @@ void test_l0_pseudo_norm() count = boost::math::tools::l0_pseudo_norm(v); BOOST_TEST_EQ(count, 1); + std::array w{0,0,1}; + count = boost::math::tools::l0_pseudo_norm(w); + BOOST_TEST_EQ(count, 1); } template @@ -549,6 +584,10 @@ void test_l1_norm() l1 = boost::math::tools::l1_norm(v); BOOST_TEST(abs(l1 - 3) < tol); + + std::array w{1,1,1}; + l1 = boost::math::tools::l1_norm(w); + BOOST_TEST(abs(l1 - 3) < tol); } template @@ -585,6 +624,10 @@ void test_l2_norm() l2 = boost::math::tools::l2_norm(v); BOOST_TEST(abs(l2 - 2) < tol); + std::array w{1,1,1,1}; + l2 = boost::math::tools::l2_norm(w); + BOOST_TEST(abs(l2 - 2) < tol); + Real bignum = 4*sqrt(std::numeric_limits::max()); v[0] = bignum; v[1] = 0; @@ -622,6 +665,11 @@ void test_shannon_entropy() Hs = boost::math::tools::shannon_entropy(v); BOOST_TEST(abs(Hs - expected) < tol*expected); + + std::array w{half(), half(), half()}; + Hs = boost::math::tools::shannon_entropy(w); + expected = 3*ln_two()/2; + BOOST_TEST(abs(Hs - expected) < tol*expected); } From 38dd139b0a65044c914cfa0fefe21d779371a843 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 8 Dec 2018 20:11:38 -0700 Subject: [PATCH 08/46] Get rid of grotesque typedefs and change to using declaration. [CI SKIP] --- .../boost/math/tools/vector_functionals.hpp | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/vector_functionals.hpp index 033eb27aa4..7c3edbc0ba 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/vector_functionals.hpp @@ -24,7 +24,7 @@ template auto mean(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type Real; + using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); if constexpr (std::is_integral::value) { @@ -58,7 +58,7 @@ template auto mean_and_population_variance(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type Real; + using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); // Higham, Accuracy and Stability, equation 1.6a and 1.6b: if constexpr (std::is_integral::value) @@ -130,7 +130,7 @@ template auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) { using std::abs; - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; size_t num_elems = std::distance(first, last); BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); auto comparator = [](RealOrComplex a, RealOrComplex b) { return abs(a) < abs(b);}; @@ -159,8 +159,8 @@ inline auto absolute_median(RandomAccessContainer & v) template auto total_variation(ForwardIterator first, ForwardIterator last) { + using Real = typename std::iterator_traits::value_type; using std::abs; - typedef typename std::remove_const())>::type>::type Real; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); Real tv = 0; auto it = first; @@ -183,7 +183,7 @@ inline auto total_variation(Container const & v) template auto shannon_entropy(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type Real; + using Real = typename std::iterator_traits::value_type; using std::log; Real entropy = 0; for (auto it = first; it != last; ++it) @@ -205,7 +205,7 @@ inline auto shannon_entropy(Container const & v) template auto shannon_cost(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type Real; + using Real = typename std::iterator_traits::value_type; using std::log; Real cost = 0; for (auto it = first; it != last; ++it) @@ -230,7 +230,7 @@ template auto sup_norm(ForwardIterator first, ForwardIterator last) { BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; using std::abs; if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) @@ -280,7 +280,7 @@ inline auto l1_norm(Container const & v) template auto l2_norm(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; using std::abs; using std::norm; using std::sqrt; @@ -340,7 +340,7 @@ inline auto l2_norm(Container const & v) template size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; size_t count = 0; for (auto it = first; it != last; ++it) { @@ -364,7 +364,7 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c using std::pow; using std::is_floating_point; using std::isfinite; - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { @@ -430,7 +430,7 @@ inline auto lp_norm(Container const & v, typename Container::value_type p) template auto gini_coefficient(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type Real; + using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); std::sort(first, last); @@ -438,7 +438,8 @@ auto gini_coefficient(ForwardIterator first, ForwardIterator last) Real i = 1; Real num = 0; Real denom = 0; - for (auto it = first; it != last; ++it) { + for (auto it = first; it != last; ++it) + { num += *it*i; denom += *it; ++i; @@ -463,7 +464,7 @@ inline auto gini_coefficient(RandomAccessContainer & v) template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) { - typedef typename std::remove_const())>::type>::type RealOrComplex; + using RealOrComplex = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); std::sort(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); @@ -500,7 +501,7 @@ inline auto absolute_gini_coefficient(RandomAccessContainer & v) template auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { - typedef typename std::remove_const())>::type>::type RealIntOrComplex; + using RealIntOrComplex = typename std::iterator_traits::value_type; using std::abs; using std::sqrt; BOOST_ASSERT_MSG(first != last, "Computation of the Hoyer sparsity requires at least one sample."); From 1a246db7912fa33ae0ff57e43d2dd7a00c84d2c7 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 8 Dec 2018 20:53:14 -0700 Subject: [PATCH 09/46] Change from vector_functionals.hpp to descriptive_statistics.hpp and norms.hpp. --- doc/math.qbk | 3 +- ...tionals.qbk => descriptive_statistics.qbk} | 126 +----- doc/vector_functionals/norms.qbk | 151 ++++++++ ...tionals.hpp => descriptive_statistics.hpp} | 231 +---------- include/boost/math/tools/norms.hpp | 248 ++++++++++++ test/Jamfile.v2 | 3 +- ...st.cpp => descriptive_statistics_test.cpp} | 330 +--------------- test/norms_test.cpp | 361 ++++++++++++++++++ 8 files changed, 774 insertions(+), 679 deletions(-) rename doc/vector_functionals/{vector_functionals.qbk => descriptive_statistics.qbk} (69%) create mode 100644 doc/vector_functionals/norms.qbk rename include/boost/math/tools/{vector_functionals.hpp => descriptive_statistics.hpp} (55%) create mode 100644 include/boost/math/tools/norms.hpp rename test/{vector_functionals_test.cpp => descriptive_statistics_test.cpp} (60%) create mode 100644 test/norms_test.cpp diff --git a/doc/math.qbk b/doc/math.qbk index 90585886f3..c5f30c6f35 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -553,7 +553,8 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [endmathpart] [/section:dist Statistical Distributions and Functions] [mathpart vector_functionals Vector Functionals] -[include vector_functionals/vector_functionals.qbk] +[include vector_functionals/descriptive_statistics.qbk] +[include vector_functionals/norms.qbk] [endmathpart] [/section:vector_functionals Vector Functionals] [mathpart special Special Functions] diff --git a/doc/vector_functionals/vector_functionals.qbk b/doc/vector_functionals/descriptive_statistics.qbk similarity index 69% rename from doc/vector_functionals/vector_functionals.qbk rename to doc/vector_functionals/descriptive_statistics.qbk index b5209feda6..1cc307b3c9 100644 --- a/doc/vector_functionals/vector_functionals.qbk +++ b/doc/vector_functionals/descriptive_statistics.qbk @@ -6,12 +6,12 @@ http://www.boost.org/LICENSE_1_0.txt). ] -[section:vector_functionals Vector Functionals] +[section:descriptive_statistics Descriptive Statistics] [heading Synopsis] `` -#include +#include namespace boost{ namespace math{ namespace tools { @@ -69,57 +69,19 @@ namespace boost{ namespace math{ namespace tools { template auto shannon_cost(ForwardIterator first, ForwardIterator last); - template - auto l0_pseudo_norm(Container const & c); - - template - auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); - - template - auto l1_norm(Container const & c); - - template - auto l1_norm(ForwardIterator first, ForwardIterator last); - - template - auto l2_norm(Container const & c); - - template - auto l2_norm(ForwardIterator first, ForwardIterator last); - - template - auto sup_norm(Container const & c); - - template - auto sup_norm(ForwardIterator first, ForwardIterator last); - - template - auto lp_norm(Container const & c); - - template - auto lp_norm(ForwardIterator first, ForwardIterator last, p); - - template - auto total_variation(Container const & c); - - template - auto total_variation(ForwardIterator first, ForwardIterator last); }}} `` [heading Description] -The file `boost/math/tools/vector_functionals.hpp` is a set of facilities for computing scalar values from vectors. -We use the word "vector functional" in the [@https://ncatlab.org/nlab/show/nonlinear+functional mathematical sense], indicating a map \u2113:\u211D[super n] \u2192 \u211D, -and occasionally maps from \u2102[super n] \u2192 \u211D and \u2102[super n] \u2192 \u2102. -The set of maps provided herein attempt to cover the most commonly encountered functionals from statistics, numerical analysis, and signal processing. +The file `boost/math/tools/descriptive_statistics.hpp` is a set of facilities for computing scalar values from vectors. Many of these functionals have trivial naive implementations, but experienced programmers will recognize that even trivial algorithms are easy to screw up, and that numerical instabilities often lurk in corner cases. We have attempted to do our "due diligence" to root out these problems-scouring the literature for numerically stable algorithms for even the simplest of functionals. /Nota bene/: Some similar functionality is provided in [@https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html Boost Accumulators Framework]. -These accumulators should be used in real-time applications; `vector_functionals.hpp` should be used when CPU vectorization is needed. +These accumulators should be used in real-time applications; `descriptive_statistics.hpp` should be used when CPU vectorization is needed. As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags. We now describe each functional in detail. @@ -244,83 +206,6 @@ Usage: The container must be forward iterable and the contents are not modified. Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. -[heading \u2113[super \u221E] norm] - -Computes the supremum norm of a dataset: - - std::vector v{-3, 2, 1}; - double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); - // sup = 3 - - std::vector> v{{0, -8}, {1,1}, {-3,2}}; - double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); - // sup = 8 - -Supports real, integral, and complex arithmetic. -Container must be forward iterable and is not modified. - -[heading \u2113[super /p/] norm] - - std::vector v{-8, 0, 0}; - double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 7); - // sup = 8 - - std::vector> v{{1, 0}, {0,1}, {0,-1}}; - double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend(), 3); - // sup = cbrt(3) - -Supports both real and complex arithmetic. -The container must be forward iterable and the contents are not modified. - -[heading \u2113[super 0] pseudo-norm] - -Counts the number of non-zero elements in a container. - - std::vector v{0,0,1}; - size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); - // count = 1 - -Supports real, integral, and complex numbers. -The container must be forward iterable and the contents are not modified. -Note that this measure is not robust against numerical noise and is therefore not as useful as (say) the Hoyer sparsity in numerical applications. -Works will real, complex, and integral inputs. - -[heading \u2113[super 1] norm] - -The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is much faster: - - std::vector v{1,1,1}; - double l1 = boost::math::tools::l1_norm(v.begin(), v.end()); - // l1 = 3 - -Requires a forward iterable input, does not modify input data, and works with real, integral, and complex numbers. - -[heading \u2113[super 2] norm] - -The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, but is much faster: - - std::vector v{1,1,1}; - double l1 = boost::math::tools::l2_norm(v.begin(), v.end()); - // l1 = sqrt(3) - -Requires a forward iterable input, does not modify input data, and works with complex numbers. - -[heading Total Variation] - - std::vector v{1,1,1}; - double tv = boost::math::tools::total_variation(v.begin(), v.end()); - // no variation in v, so tv = 0. - v = {0,1}; - double tv = boost::math::tools::total_variation(v.begin(), v.end()); - // variation is 1, so tv = 1. - std::vector v{1,1,1}; - int tv = boost::math::tools::total_variation(v); - -The total variation only supports real numbers and /signed/ integers. -All the constituent operations to compute the total variation are well-defined for complex numbers, -but the computed result is not meaningful; a 2D total variation is more appropriate. -The container must be forward iterable, and the contents are not modified. - [heading Shannon Entropy] std::vector v{1/2.0, 1/2.0}; @@ -330,7 +215,6 @@ The container must be forward iterable, and the contents are not modified. The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced. The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. - [heading Shannon Cost] std::vector v{-1, 1,-1}; @@ -350,4 +234,4 @@ See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for deta * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. [endsect] -[/section:vector_functionals Vector Functionals] +[/section:descriptive_statistics Descriptive Statistics] diff --git a/doc/vector_functionals/norms.qbk b/doc/vector_functionals/norms.qbk new file mode 100644 index 0000000000..221980c545 --- /dev/null +++ b/doc/vector_functionals/norms.qbk @@ -0,0 +1,151 @@ +[/ + Copyright 2017 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:norms Norms] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto l0_pseudo_norm(Container const & c); + + template + auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); + + template + auto l1_norm(Container const & c); + + template + auto l1_norm(ForwardIterator first, ForwardIterator last); + + template + auto l2_norm(Container const & c); + + template + auto l2_norm(ForwardIterator first, ForwardIterator last); + + template + auto sup_norm(Container const & c); + + template + auto sup_norm(ForwardIterator first, ForwardIterator last); + + template + auto lp_norm(Container const & c); + + template + auto lp_norm(ForwardIterator first, ForwardIterator last, p); + + template + auto total_variation(Container const & c); + + template + auto total_variation(ForwardIterator first, ForwardIterator last); + +}}} +`` + +[heading Description] + +The file `boost/math/tools/norms.hpp` is a set of facilities for computing scalar values traditionally useful in numerical analysis from vectors. + +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +For certain operations (total variation, for example) integer inputs are supported. + +[heading \u2113[super \u221E] norm] + +Computes the supremum norm of a dataset: + + std::vector v{-3, 2, 1}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // sup = 3 + + std::vector> v{{0, -8}, {1,1}, {-3,2}}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // sup = 8 + +Supports real, integral, and complex arithmetic. +Container must be forward iterable and is not modified. + +[heading \u2113[super /p/] norm] + + std::vector v{-8, 0, 0}; + double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 7); + // sup = 8 + + std::vector> v{{1, 0}, {0,1}, {0,-1}}; + double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend(), 3); + // sup = cbrt(3) + +Supports both real and complex arithmetic. +The container must be forward iterable and the contents are not modified. + +[heading \u2113[super 0] pseudo-norm] + +Counts the number of non-zero elements in a container. + + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + // count = 1 + +Supports real, integral, and complex numbers. +The container must be forward iterable and the contents are not modified. +Note that this measure is not robust against numerical noise and is therefore not as useful as (say) the Hoyer sparsity in numerical applications. +Works will real, complex, and integral inputs. + +[heading \u2113[super 1] norm] + +The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + // l1 = 3 + +Requires a forward iterable input, does not modify input data, and works with real, integral, and complex numbers. + +[heading \u2113[super 2] norm] + +The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, but is much faster: + + std::vector v{1,1,1}; + double l1 = boost::math::tools::l2_norm(v.begin(), v.end()); + // l1 = sqrt(3) + +Requires a forward iterable input, does not modify input data, and works with complex numbers. + +[heading Total Variation] + + std::vector v{1,1,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // no variation in v, so tv = 0. + v = {0,1}; + double tv = boost::math::tools::total_variation(v.begin(), v.end()); + // variation is 1, so tv = 1. + std::vector v{1,1,1}; + int tv = boost::math::tools::total_variation(v); + +The total variation only supports real numbers and /signed/ integers. +All the constituent operations to compute the total variation are well-defined for complex numbers, +but the computed result is not meaningful; a 2D total variation is more appropriate. +The container must be forward iterable, and the contents are not modified. + +[heading References] + +* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. +* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. +* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. +* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. + +[endsect] +[/section:norms Norms] diff --git a/include/boost/math/tools/vector_functionals.hpp b/include/boost/math/tools/descriptive_statistics.hpp similarity index 55% rename from include/boost/math/tools/vector_functionals.hpp rename to include/boost/math/tools/descriptive_statistics.hpp index 7c3edbc0ba..4e625e5c59 100644 --- a/include/boost/math/tools/vector_functionals.hpp +++ b/include/boost/math/tools/descriptive_statistics.hpp @@ -3,12 +3,11 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_MATH_TOOLS_VECTOR_FUNCTIONALS_HPP -#define BOOST_MATH_TOOLS_VECTOR_FUNCTIONALS_HPP +#ifndef BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP +#define BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP #include #include -#include #include #include #include @@ -155,30 +154,6 @@ inline auto absolute_median(RandomAccessContainer & v) return absolute_median(v.begin(), v.end()); } -// Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: -template -auto total_variation(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - using std::abs; - BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); - Real tv = 0; - auto it = first; - Real tmp = *it; - while (++it != last) - { - tv += abs(*it - tmp); - tmp = *it; - } - return tv; -} - -template -inline auto total_variation(Container const & v) -{ - return total_variation(v.cbegin(), v.cend()); -} - template auto shannon_entropy(ForwardIterator first, ForwardIterator last) @@ -202,6 +177,7 @@ inline auto shannon_entropy(Container const & v) return shannon_entropy(v.cbegin(), v.cend()); } + template auto shannon_cost(ForwardIterator first, ForwardIterator last) { @@ -226,207 +202,6 @@ inline auto shannon_cost(Container const & v) } -template -auto sup_norm(ForwardIterator first, ForwardIterator last) -{ - BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); - using RealOrComplex = typename std::iterator_traits::value_type; - using std::abs; - if constexpr (boost::is_complex::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) - { - auto it = std::max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); - return abs(*it); - } - else - { - auto pair = std::minmax_element(first, last); - if (abs(*pair.first) > abs(*pair.second)) - { - return abs(*pair.first); - } - else - { - return abs(*pair.second); - } - } -} - -template -inline auto sup_norm(Container const & v) -{ - return sup_norm(v.cbegin(), v.cend()); -} - -template -auto l1_norm(ForwardIterator first, ForwardIterator last) -{ - using std::abs; - decltype(abs(*first)) l1 = 0; - for (auto it = first; it != last; ++it) - { - l1 += abs(*first); - } - return l1; -} - -template -inline auto l1_norm(Container const & v) -{ - return l1_norm(v.cbegin(), v.cend()); -} - - -template -auto l2_norm(ForwardIterator first, ForwardIterator last) -{ - using RealOrComplex = typename std::iterator_traits::value_type; - using std::abs; - using std::norm; - using std::sqrt; - using std::is_floating_point; - if constexpr (boost::is_complex::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) - { - typedef typename RealOrComplex::value_type Real; - Real l2 = 0; - for (auto it = first; it != last; ++it) - { - l2 += norm(*it); - } - Real result = sqrt(l2); - if (!isfinite(result)) - { - Real a = sup_norm(first, last); - l2 = 0; - for (auto it = first; it != last; ++it) - { - l2 += norm(*it/a); - } - return a*sqrt(l2); - } - return result; - } - else if constexpr (is_floating_point::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) - { - RealOrComplex l2 = 0; - for (auto it = first; it != last; ++it) - { - l2 += (*it)*(*it); - } - RealOrComplex result = sqrt(l2); - if (!isfinite(result)) - { - RealOrComplex a = sup_norm(first, last); - l2 = 0; - for (auto it = first; it != last; ++it) - { - RealOrComplex tmp = *it/a; - l2 += tmp*tmp; - } - return a*sqrt(l2); - } - return result; - } -} - -template -inline auto l2_norm(Container const & v) -{ - return l2_norm(v.cbegin(), v.cend()); -} - -template -size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) -{ - using RealOrComplex = typename std::iterator_traits::value_type; - size_t count = 0; - for (auto it = first; it != last; ++it) - { - if (*it != RealOrComplex(0)) - { - ++count; - } - } - return count; -} - -template -inline size_t l0_pseudo_norm(Container const & v) -{ - return l0_pseudo_norm(v.cbegin(), v.cend()); -} - -template -auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_const())>::type>::type p) -{ - using std::pow; - using std::is_floating_point; - using std::isfinite; - using RealOrComplex = typename std::iterator_traits::value_type; - if constexpr (boost::is_complex::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) - { - BOOST_ASSERT_MSG(p.real() >= 0, "For p < 0, the lp norm is not a norm."); - BOOST_ASSERT_MSG(p.imag() == 0, "For imaginary p, the lp norm is not a norm."); - using std::norm; - decltype(p.real()) lp = 0; - for (auto it = first; it != last; ++it) - { - lp += pow(norm(*it), p.real()/2); - } - - auto result = pow(lp, 1/p.real()); - if (!isfinite(result)) - { - auto a = boost::math::tools::sup_norm(first, last); - decltype(p.real()) lp = 0; - for (auto it = first; it != last; ++it) - { - lp += pow(abs(*it)/a, p.real()); - } - result = a*pow(lp, 1/p.real()); - } - return result; - } - else if constexpr (is_floating_point::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) - { - BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); - RealOrComplex lp = 0; - - for (auto it = first; it != last; ++it) - { - lp += pow(abs(*it), p); - } - - RealOrComplex result = pow(lp, 1/p); - if (!isfinite(result)) - { - RealOrComplex a = boost::math::tools::sup_norm(first, last); - lp = 0; - for (auto it = first; it != last; ++it) - { - lp += pow(abs(*it)/a, p); - } - result = a*pow(lp, 1/p); - } - return result; - } - else - { - BOOST_ASSERT_MSG(false, "Unable to determine if the input type is real or complex."); - } -} - -template -inline auto lp_norm(Container const & v, typename Container::value_type p) -{ - return lp_norm(v.cbegin(), v.cend(), p); -} - - template auto gini_coefficient(ForwardIterator first, ForwardIterator last) { diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp new file mode 100644 index 0000000000..8563b15704 --- /dev/null +++ b/include/boost/math/tools/norms.hpp @@ -0,0 +1,248 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_NORMS_HPP +#define BOOST_MATH_TOOLS_NORMS_HPP + +#include +#include +#include +#include +#include + +/* + * A set of tools for computing scalar quantities associated with lists of numbers. + */ + + +namespace boost{ namespace math{ namespace tools { + +// Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: +template +auto total_variation(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + using std::abs; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); + Real tv = 0; + auto it = first; + Real tmp = *it; + while (++it != last) + { + tv += abs(*it - tmp); + tmp = *it; + } + return tv; +} + +template +inline auto total_variation(Container const & v) +{ + return total_variation(v.cbegin(), v.cend()); +} + + +template +auto sup_norm(ForwardIterator first, ForwardIterator last) +{ + BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); + using RealOrComplex = typename std::iterator_traits::value_type; + using std::abs; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + auto it = std::max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + return abs(*it); + } + else + { + auto pair = std::minmax_element(first, last); + if (abs(*pair.first) > abs(*pair.second)) + { + return abs(*pair.first); + } + else + { + return abs(*pair.second); + } + } +} + +template +inline auto sup_norm(Container const & v) +{ + return sup_norm(v.cbegin(), v.cend()); +} + +template +auto l1_norm(ForwardIterator first, ForwardIterator last) +{ + using std::abs; + decltype(abs(*first)) l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += abs(*first); + } + return l1; +} + +template +inline auto l1_norm(Container const & v) +{ + return l1_norm(v.cbegin(), v.cend()); +} + + +template +auto l2_norm(ForwardIterator first, ForwardIterator last) +{ + using RealOrComplex = typename std::iterator_traits::value_type; + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + typedef typename RealOrComplex::value_type Real; + Real l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it); + } + Real result = sqrt(l2); + if (!isfinite(result)) + { + Real a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += norm(*it/a); + } + return a*sqrt(l2); + } + return result; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + RealOrComplex l2 = 0; + for (auto it = first; it != last; ++it) + { + l2 += (*it)*(*it); + } + RealOrComplex result = sqrt(l2); + if (!isfinite(result)) + { + RealOrComplex a = sup_norm(first, last); + l2 = 0; + for (auto it = first; it != last; ++it) + { + RealOrComplex tmp = *it/a; + l2 += tmp*tmp; + } + return a*sqrt(l2); + } + return result; + } +} + +template +inline auto l2_norm(Container const & v) +{ + return l2_norm(v.cbegin(), v.cend()); +} + +template +size_t l0_pseudo_norm(ForwardIterator first, ForwardIterator last) +{ + using RealOrComplex = typename std::iterator_traits::value_type; + size_t count = 0; + for (auto it = first; it != last; ++it) + { + if (*it != RealOrComplex(0)) + { + ++count; + } + } + return count; +} + +template +inline size_t l0_pseudo_norm(Container const & v) +{ + return l0_pseudo_norm(v.cbegin(), v.cend()); +} + +template +auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_const())>::type>::type p) +{ + using std::pow; + using std::is_floating_point; + using std::isfinite; + using RealOrComplex = typename std::iterator_traits::value_type; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + BOOST_ASSERT_MSG(p.real() >= 0, "For p < 0, the lp norm is not a norm."); + BOOST_ASSERT_MSG(p.imag() == 0, "For imaginary p, the lp norm is not a norm."); + using std::norm; + decltype(p.real()) lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(norm(*it), p.real()/2); + } + + auto result = pow(lp, 1/p.real()); + if (!isfinite(result)) + { + auto a = boost::math::tools::sup_norm(first, last); + decltype(p.real()) lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p.real()); + } + result = a*pow(lp, 1/p.real()); + } + return result; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); + RealOrComplex lp = 0; + + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it), p); + } + + RealOrComplex result = pow(lp, 1/p); + if (!isfinite(result)) + { + RealOrComplex a = boost::math::tools::sup_norm(first, last); + lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p); + } + result = a*pow(lp, 1/p); + } + return result; + } + else + { + BOOST_ASSERT_MSG(false, "Unable to determine if the input type is real or complex."); + } +} + +template +inline auto lp_norm(Container const & v, typename Container::value_type p) +{ + return lp_norm(v.cbegin(), v.cend(), p); +} + +}}} +#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 74a2a85a07..a8fa366ac4 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -902,7 +902,8 @@ test-suite misc : [ run test_constant_generate.cpp : : : release USE_CPP_FLOAT=1 off:no ] [ run test_cubic_b_spline.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] # does not in fact require C++17 constexpr; requires C++17 std::size. - [ run vector_functionals_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] + [ run descriptive_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] + [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/vector_functionals_test.cpp b/test/descriptive_statistics_test.cpp similarity index 60% rename from test/vector_functionals_test.cpp rename to test/descriptive_statistics_test.cpp index 187d4865a6..929ef40ed1 100644 --- a/test/vector_functionals_test.cpp +++ b/test/descriptive_statistics_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -39,7 +39,7 @@ void test_integer_mean() // Work with std::array? std::array w{1,2,3,4,5}; - mu = boost::math::tools::mean(v); + mu = boost::math::tools::mean(w); BOOST_TEST(abs(mu - 3) < tol); } @@ -261,161 +261,6 @@ void test_complex_absolute_median() } -template -void test_lp() -{ - Real tol = 50*std::numeric_limits::epsilon(); - - std::array u{1,0,0}; - Real l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); - BOOST_TEST(abs(l3 - 1) < tol); - - u[0] = -8; - l3 = boost::math::tools::lp_norm(u.cbegin(), u.cend(), 3); - BOOST_TEST(abs(l3 - 8) < tol); - - std::vector v(500); - for (size_t i = 0; i < v.size(); ++i) { - v[i] = 7; - } - Real l8 = boost::math::tools::lp_norm(v, 8); - Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); - BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); - - // Does it work with ublas vectors? - // Does it handle the overflow of intermediates? - boost::numeric::ublas::vector w(4); - Real bignum = sqrt(std::numeric_limits::max())/256; - for (size_t i = 0; i < w.size(); ++i) - { - w[i] = bignum; - } - Real l20 = boost::math::tools::lp_norm(w.cbegin(), w.cend(), 4); - expected = bignum*pow(w.size(), static_cast(1)/static_cast(4)); - BOOST_TEST(abs(l20 - expected) < tol*expected); -} - - -template -void test_complex_lp() -{ - typedef typename Complex::value_type Real; - Real tol = std::numeric_limits::epsilon(); - std::vector v{{1,0}, {0,0}, {0,0}}; - Real l3 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); - BOOST_TEST(abs(l3 - 1) < tol); - - l3 = boost::math::tools::lp_norm(v, 3); - BOOST_TEST(abs(l3 - 1) < tol); - -} - -template -void test_integer_total_variation() -{ - std::vector v{1,1}; - Z tv = boost::math::tools::total_variation(v); - BOOST_TEST_EQ(tv,0); - - v[1] = 2; - tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST_EQ(tv,1); - - v.resize(50); - for (size_t i = 0; i < v.size(); ++i) { - v[i] = i; - } - - tv = boost::math::tools::total_variation(v); - BOOST_TEST_EQ(tv, v.size() -1); - - for (size_t i = 0; i < v.size(); ++i) { - v[i] = i*i; - } - - tv = boost::math::tools::total_variation(v); - BOOST_TEST_EQ(tv, (v.size() -1)*(v.size()-1)); - - // Work with std::array? - std::array w{1,1}; - tv = boost::math::tools::total_variation(w); - BOOST_TEST_EQ(tv,0); -} - -template -void test_total_variation() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1}; - Real tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST(tv >= 0 && abs(tv) < tol); - - tv = boost::math::tools::total_variation(v); - BOOST_TEST(tv >= 0 && abs(tv) < tol); - - v[1] = 2; - tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST(abs(tv - 1) < tol); - - v.resize(50); - for (size_t i = 0; i < v.size(); ++i) { - v[i] = i; - } - - tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST(abs(tv - (v.size() -1)) < tol); - - for (size_t i = 0; i < v.size(); ++i) { - v[i] = i*i; - } - - tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST(abs(tv - (v.size() -1)*(v.size()-1)) < tol); -} - -template -void test_sup_norm() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{-2,1,0}; - Real s = boost::math::tools::sup_norm(v.begin(), v.end()); - BOOST_TEST(abs(s - 2) < tol); - - s = boost::math::tools::sup_norm(v); - BOOST_TEST(abs(s - 2) < tol); - - // Work with std::array? - std::array w{-2,1,0}; - s = boost::math::tools::sup_norm(w); - BOOST_TEST(abs(s - 2) < tol); - -} - -template -void test_integer_sup_norm() -{ - std::vector v{-2,1,0}; - Z s = boost::math::tools::sup_norm(v.begin(), v.end()); - BOOST_TEST_EQ(s, 2); - - s = boost::math::tools::sup_norm(v); - BOOST_TEST_EQ(s,2); -} - -template -void test_complex_sup_norm() -{ - typedef typename Complex::value_type Real; - Real tol = std::numeric_limits::epsilon(); - std::vector w{{0,-8}, {1,1}, {3,2}}; - Real s = boost::math::tools::sup_norm(w.cbegin(), w.cend()); - BOOST_TEST(abs(s-8) < tol); - - s = boost::math::tools::sup_norm(w); - BOOST_TEST(abs(s-8) < tol); -} - - template void test_gini_coefficient() { @@ -543,115 +388,6 @@ void test_absolute_gini_coefficient() // The Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). } -template -void test_l0_pseudo_norm() -{ - std::vector v{0,0,1}; - size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); - BOOST_TEST_EQ(count, 1); - - // Compiles with cbegin()/cend()? - count = boost::math::tools::l0_pseudo_norm(v.cbegin(), v.cend()); - BOOST_TEST_EQ(count, 1); - - count = boost::math::tools::l0_pseudo_norm(v); - BOOST_TEST_EQ(count, 1); - - std::array w{0,0,1}; - count = boost::math::tools::l0_pseudo_norm(w); - BOOST_TEST_EQ(count, 1); -} - -template -void test_complex_l0_pseudo_norm() -{ - std::vector v{{0,0}, {0,0}, {1,0}}; - size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); - BOOST_TEST_EQ(count, 1); - - count = boost::math::tools::l0_pseudo_norm(v); - BOOST_TEST_EQ(count, 1); - -} - -template -void test_l1_norm() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1}; - Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); - BOOST_TEST(abs(l1 - 3) < tol); - - l1 = boost::math::tools::l1_norm(v); - BOOST_TEST(abs(l1 - 3) < tol); - - std::array w{1,1,1}; - l1 = boost::math::tools::l1_norm(w); - BOOST_TEST(abs(l1 - 3) < tol); -} - -template -void test_integer_l1_norm() -{ - std::vector v{1,1,1}; - Z l1 = boost::math::tools::l1_norm(v.begin(), v.end()); - BOOST_TEST_EQ(l1, 3); -} - -template -void test_complex_l1_norm() -{ - typedef typename Complex::value_type Real; - Real tol = std::numeric_limits::epsilon(); - std::vector v{{1,0}, {0,1},{0,-1}}; - Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); - BOOST_TEST(abs(l1 - 3) < tol); - - l1 = boost::math::tools::l1_norm(v); - BOOST_TEST(abs(l1 - 3) < tol); - -} - -template -void test_l2_norm() -{ - using std::sqrt; - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1,1}; - Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); - BOOST_TEST(abs(l2 - 2) < tol); - - l2 = boost::math::tools::l2_norm(v); - BOOST_TEST(abs(l2 - 2) < tol); - - std::array w{1,1,1,1}; - l2 = boost::math::tools::l2_norm(w); - BOOST_TEST(abs(l2 - 2) < tol); - - Real bignum = 4*sqrt(std::numeric_limits::max()); - v[0] = bignum; - v[1] = 0; - v[2] = 0; - v[3] = 0; - l2 = boost::math::tools::l2_norm(v.begin(), v.end()); - BOOST_TEST(abs(l2 - bignum) < tol*l2); -} - -template -void test_complex_l2_norm() -{ - using std::sqrt; - typedef typename Complex::value_type Real; - Real tol = 100*std::numeric_limits::epsilon(); - std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; - Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); - BOOST_TEST(abs(l2 - 2) < tol); - - l2 = boost::math::tools::l2_norm(v); - BOOST_TEST(abs(l2 - 2) < tol); - -} - template void test_shannon_entropy() { @@ -709,68 +445,6 @@ int main() test_complex_absolute_median>(); test_complex_absolute_median(); - test_lp(); - test_lp(); - test_lp(); - test_lp(); - - test_complex_lp>(); - test_complex_lp>(); - test_complex_lp>(); - test_complex_lp(); - - test_sup_norm(); - test_sup_norm(); - test_sup_norm(); - test_sup_norm(); - - test_integer_sup_norm(); - - test_complex_sup_norm>(); - test_complex_sup_norm>(); - test_complex_sup_norm>(); - test_complex_sup_norm(); - - test_l0_pseudo_norm(); - test_l0_pseudo_norm(); - test_l0_pseudo_norm(); - test_l0_pseudo_norm(); - test_l0_pseudo_norm(); - - test_complex_l0_pseudo_norm>(); - test_complex_l0_pseudo_norm>(); - test_complex_l0_pseudo_norm>(); - test_complex_l0_pseudo_norm(); - - test_l1_norm(); - test_l1_norm(); - test_l1_norm(); - test_l1_norm(); - - test_integer_l1_norm(); - - test_complex_l2_norm>(); - test_complex_l2_norm>(); - test_complex_l2_norm>(); - test_complex_l2_norm(); - - test_l2_norm(); - test_l2_norm(); - test_l2_norm(); - test_l2_norm(); - - test_complex_l1_norm>(); - test_complex_l1_norm>(); - test_complex_l1_norm>(); - test_complex_l1_norm(); - - test_total_variation(); - test_total_variation(); - test_total_variation(); - test_total_variation(); - - test_integer_total_variation(); - test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); diff --git a/test/norms_test.cpp b/test/norms_test.cpp new file mode 100644 index 0000000000..470c86c721 --- /dev/null +++ b/test/norms_test.cpp @@ -0,0 +1,361 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + + +template +void test_lp() +{ + Real tol = 50*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + Real l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + u[0] = -8; + l3 = boost::math::tools::lp_norm(u.cbegin(), u.cend(), 3); + BOOST_TEST(abs(l3 - 8) < tol); + + std::vector v(500); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = 7; + } + Real l8 = boost::math::tools::lp_norm(v, 8); + Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); + BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); + + // Does it work with ublas vectors? + // Does it handle the overflow of intermediates? + boost::numeric::ublas::vector w(4); + Real bignum = sqrt(std::numeric_limits::max())/256; + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = bignum; + } + Real l20 = boost::math::tools::lp_norm(w.cbegin(), w.cend(), 4); + expected = bignum*pow(w.size(), static_cast(1)/static_cast(4)); + BOOST_TEST(abs(l20 - expected) < tol*expected); +} + + +template +void test_complex_lp() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,0}, {0,0}}; + Real l3 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); + BOOST_TEST(abs(l3 - 1) < tol); + + l3 = boost::math::tools::lp_norm(v, 3); + BOOST_TEST(abs(l3 - 1) < tol); + +} + +template +void test_integer_total_variation() +{ + std::vector v{1,1}; + Z tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv,0); + + v[1] = 2; + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST_EQ(tv,1); + + v.resize(50); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, v.size() -1); + + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i*i; + } + + tv = boost::math::tools::total_variation(v); + BOOST_TEST_EQ(tv, (v.size() -1)*(v.size()-1)); + + // Work with std::array? + std::array w{1,1}; + tv = boost::math::tools::total_variation(w); + BOOST_TEST_EQ(tv,0); +} + +template +void test_total_variation() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1}; + Real tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + + tv = boost::math::tools::total_variation(v); + BOOST_TEST(tv >= 0 && abs(tv) < tol); + + v[1] = 2; + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - 1) < tol); + + v.resize(50); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i; + } + + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() -1)) < tol); + + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i*i; + } + + tv = boost::math::tools::total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() -1)*(v.size()-1)) < tol); +} + +template +void test_sup_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{-2,1,0}; + Real s = boost::math::tools::sup_norm(v.begin(), v.end()); + BOOST_TEST(abs(s - 2) < tol); + + s = boost::math::tools::sup_norm(v); + BOOST_TEST(abs(s - 2) < tol); + + // Work with std::array? + std::array w{-2,1,0}; + s = boost::math::tools::sup_norm(w); + BOOST_TEST(abs(s - 2) < tol); + +} + +template +void test_integer_sup_norm() +{ + std::vector v{-2,1,0}; + Z s = boost::math::tools::sup_norm(v.begin(), v.end()); + BOOST_TEST_EQ(s, 2); + + s = boost::math::tools::sup_norm(v); + BOOST_TEST_EQ(s,2); +} + +template +void test_complex_sup_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector w{{0,-8}, {1,1}, {3,2}}; + Real s = boost::math::tools::sup_norm(w.cbegin(), w.cend()); + BOOST_TEST(abs(s-8) < tol); + + s = boost::math::tools::sup_norm(w); + BOOST_TEST(abs(s-8) < tol); +} + +template +void test_l0_pseudo_norm() +{ + std::vector v{0,0,1}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + BOOST_TEST_EQ(count, 1); + + // Compiles with cbegin()/cend()? + count = boost::math::tools::l0_pseudo_norm(v.cbegin(), v.cend()); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); + + std::array w{0,0,1}; + count = boost::math::tools::l0_pseudo_norm(w); + BOOST_TEST_EQ(count, 1); +} + +template +void test_complex_l0_pseudo_norm() +{ + std::vector v{{0,0}, {0,0}, {1,0}}; + size_t count = boost::math::tools::l0_pseudo_norm(v.begin(), v.end()); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::l0_pseudo_norm(v); + BOOST_TEST_EQ(count, 1); + +} + +template +void test_l1_norm() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + + std::array w{1,1,1}; + l1 = boost::math::tools::l1_norm(w); + BOOST_TEST(abs(l1 - 3) < tol); +} + +template +void test_integer_l1_norm() +{ + std::vector v{1,1,1}; + Z l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST_EQ(l1, 3); +} + +template +void test_complex_l1_norm() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}}; + Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_norm(v); + BOOST_TEST(abs(l1 - 3) < tol); + +} + +template +void test_l2_norm() +{ + using std::sqrt; + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + + std::array w{1,1,1,1}; + l2 = boost::math::tools::l2_norm(w); + BOOST_TEST(abs(l2 - 2) < tol); + + Real bignum = 4*sqrt(std::numeric_limits::max()); + v[0] = bignum; + v[1] = 0; + v[2] = 0; + v[3] = 0; + l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - bignum) < tol*l2); +} + +template +void test_complex_l2_norm() +{ + using std::sqrt; + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); + + l2 = boost::math::tools::l2_norm(v); + BOOST_TEST(abs(l2 - 2) < tol); + +} + +int main() +{ + test_lp(); + test_lp(); + test_lp(); + test_lp(); + + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp(); + + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + + test_integer_sup_norm(); + + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm(); + + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); + + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm>(); + test_complex_l0_pseudo_norm(); + + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + test_l1_norm(); + + test_integer_l1_norm(); + + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm>(); + test_complex_l2_norm(); + + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + test_l2_norm(); + + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm(); + + test_total_variation(); + test_total_variation(); + test_total_variation(); + test_total_variation(); + + test_integer_total_variation(); + + return boost::report_errors(); +} From 94ceca1e43bb7d055654b0f5ecca9e8a6421af24 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 10 Dec 2018 12:05:05 -0700 Subject: [PATCH 10/46] Split descriptive_statistics.hpp into univariate_statistics.hpp and a currently-hypothetical bivariate_statistics.hpp [CI SKIP] --- doc/math.qbk | 3 +- doc/vector_functionals/norms.qbk | 7 +- doc/vector_functionals/signal_statistics.qbk | 144 ++++++++++ ...atistics.qbk => univariate_statistics.qbk} | 115 +------- include/boost/math/tools/norms.hpp | 32 ++- ...e_statistics.hpp => signal_statistics.hpp} | 148 +---------- .../math/tools/univariate_statistics.hpp | 158 +++++++++++ test/Jamfile.v2 | 3 +- test/norms_test.cpp | 17 +- ...cs_test.cpp => signal_statistics_test.cpp} | 215 +-------------- test/univariate_statistics_test.cpp | 248 ++++++++++++++++++ 11 files changed, 604 insertions(+), 486 deletions(-) create mode 100644 doc/vector_functionals/signal_statistics.qbk rename doc/vector_functionals/{descriptive_statistics.qbk => univariate_statistics.qbk} (54%) rename include/boost/math/tools/{descriptive_statistics.hpp => signal_statistics.hpp} (55%) create mode 100644 include/boost/math/tools/univariate_statistics.hpp rename test/{descriptive_statistics_test.cpp => signal_statistics_test.cpp} (55%) create mode 100644 test/univariate_statistics_test.cpp diff --git a/doc/math.qbk b/doc/math.qbk index c5f30c6f35..0903a6d97a 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -553,7 +553,8 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [endmathpart] [/section:dist Statistical Distributions and Functions] [mathpart vector_functionals Vector Functionals] -[include vector_functionals/descriptive_statistics.qbk] +[include vector_functionals/univariate_statistics.qbk] +[include vector_functionals/signal_statistics.qbk] [include vector_functionals/norms.qbk] [endmathpart] [/section:vector_functionals Vector Functionals] diff --git a/doc/vector_functionals/norms.qbk b/doc/vector_functionals/norms.qbk index 221980c545..553ea56763 100644 --- a/doc/vector_functionals/norms.qbk +++ b/doc/vector_functionals/norms.qbk @@ -122,7 +122,7 @@ The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, double l1 = boost::math::tools::l2_norm(v.begin(), v.end()); // l1 = sqrt(3) -Requires a forward iterable input, does not modify input data, and works with complex numbers. +Requires a forward iterable input, does not modify input data, and works with real and complex numbers. [heading Total Variation] @@ -135,11 +135,14 @@ Requires a forward iterable input, does not modify input data, and works with co std::vector v{1,1,1}; int tv = boost::math::tools::total_variation(v); -The total variation only supports real numbers and /signed/ integers. +The total variation only supports real numbers and integers. All the constituent operations to compute the total variation are well-defined for complex numbers, but the computed result is not meaningful; a 2D total variation is more appropriate. The container must be forward iterable, and the contents are not modified. +As an aside, the total variation is not technically a norm, since /TV(v) = 0/ does not imply /v = 0/. +However, it satisfies the triangle inequality and is absolutely 1-homogeneous, so it is a seminorm, and hence is grouped with the other norms here. + [heading References] * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk new file mode 100644 index 0000000000..3e1bae7d00 --- /dev/null +++ b/doc/vector_functionals/signal_statistics.qbk @@ -0,0 +1,144 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:signal_statistics Signal Statistics] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto absolute_median(Container & c); + + template + auto absolute_median(ForwardIterator first, ForwardIterator last); + + template + auto absolute_gini_coefficient(Container & c); + + template + auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + + template + auto hoyer_sparsity(Container const & c); + + template + auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); + + template + auto shannon_entropy(Container const & c); + + template + auto shannon_entropy(ForwardIterator first, ForwardIterator last); + + template + auto shannon_cost(Container const & c); + + template + auto shannon_cost(ForwardIterator first, ForwardIterator last); + + +}}} +`` + +[heading Description] + +The file `boost/math/tools/signal_statistics.hpp` is a set of facilities for computing quantities commonly used in signal analysis. + +Our examples use `std::vector` to hold the data, but this not required. +In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. +For certain operations (total variation, for example) integer inputs are supported. + +[heading Absolute Median] + +The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance. +See [@https://wavelet-tour.github.io/ Mallat] for details. +The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators. + + std::vector v{-1, 1}; + double m = boost::math::tools::absolute_median(v.begin(), v.end()); + // m = 1 + +[heading Absolute Gini Coefficient] + +The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. +A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. +However, for measuring sparsity, the phase of the numbers is irrelevant, so we provide the `absolute_gini_coefficient`: + + std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; + double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); + // now abs_gini = 1 + + std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; + double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); + // now abs_gini = 0 + + std::vector u{-1, 1, -1}; + double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end()); + // now abs_gini = 0 + +Wikipedia calls our scaling a "sample Gini coefficient". +We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient, +whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. + +If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), +consider calculating the Hoyer sparsity instead. + +[heading Hoyer Sparsity] + +The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. +As the name suggests, it is used to measure sparsity in an expansion in some basis. + +The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). +For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. + +Usage: + + std::vector v{1,0,0}; + Real hs = boost::math::tools::hoyer_sparsity(v); + // hs = 1 + std::vector v{1,-1,1}; + Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + // hs = 0 + +The container must be forward iterable and the contents are not modified. +Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. + +[heading Shannon Entropy] + + std::vector v{1/2.0, 1/2.0}; + double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); + // Hs = ln(2). + +The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced. +The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. + +[heading Shannon Cost] + + std::vector v{-1, 1,-1}; + double Ks = boost::math::tools::shannon_cost(v.begin(), v.end()); + // Ks = 0; concentration of the vector is minimized. + +The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. +The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295/w/) = /K/[sub /s/](v) + /K/[sub /s/](w). +See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. + + +[heading References] + +* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. +* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. +* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. +* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. + +[endsect] +[/section:signal_statistics Signal Statistics] diff --git a/doc/vector_functionals/descriptive_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk similarity index 54% rename from doc/vector_functionals/descriptive_statistics.qbk rename to doc/vector_functionals/univariate_statistics.qbk index 1cc307b3c9..5bb1e36934 100644 --- a/doc/vector_functionals/descriptive_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -1,17 +1,17 @@ [/ - Copyright 2017 Nick Thompson + Copyright 2018 Nick Thompson Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt). ] -[section:descriptive_statistics Descriptive Statistics] +[section:univariate_statistics Univariate Statistics] [heading Synopsis] `` -#include +#include namespace boost{ namespace math{ namespace tools { @@ -33,55 +33,24 @@ namespace boost{ namespace math{ namespace tools { template auto median(ForwardIterator first, ForwardIterator last); - template - auto absolute_median(Container & c); - - template - auto absolute_median(ForwardIterator first, ForwardIterator last); - template auto gini_coefficient(Container & c); template auto gini_coefficient(ForwardIterator first, ForwardIterator last); - template - auto absolute_gini_coefficient(Container & c); - - template - auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); - - template - auto hoyer_sparsity(Container const & c); - - template - auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); - - template - auto shannon_entropy(Container const & c); - - template - auto shannon_entropy(ForwardIterator first, ForwardIterator last); - - template - auto shannon_cost(Container const & c); - - template - auto shannon_cost(ForwardIterator first, ForwardIterator last); - - }}} `` [heading Description] -The file `boost/math/tools/descriptive_statistics.hpp` is a set of facilities for computing scalar values from vectors. +The file `boost/math/tools/univariate_statistics.hpp` is a set of facilities for computing scalar values from vectors. Many of these functionals have trivial naive implementations, but experienced programmers will recognize that even trivial algorithms are easy to screw up, and that numerical instabilities often lurk in corner cases. We have attempted to do our "due diligence" to root out these problems-scouring the literature for numerically stable algorithms for even the simplest of functionals. /Nota bene/: Some similar functionality is provided in [@https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html Boost Accumulators Framework]. -These accumulators should be used in real-time applications; `descriptive_statistics.hpp` should be used when CPU vectorization is needed. +These accumulators should be used in real-time applications; `univariate_statistics.hpp` should be used when CPU vectorization is needed. As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags. We now describe each functional in detail. @@ -131,15 +100,6 @@ Compute the median of a dataset: The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. Therefore, all requirements of `std::nth_element` are inherited by the median calculation. -[heading Absolute Median] - -The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance. -See [@https://wavelet-tour.github.io/ Mallat] for details. -The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators. - - std::vector v{-1, 1}; - double m = boost::math::tools::absolute_median(v.begin(), v.end()); - // m = 1 [heading Gini Coefficient] @@ -163,69 +123,6 @@ If you wish to convert the Boost Gini coefficient to the population Gini coeffic However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. You should have /very/ good cause to pass negative values to the Gini coefficient calculator. -The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. -A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. -However, for measuring sparsity, the phase of the numbers is irrelevant, so `absolute_gini_coefficient` should be used instead: - - std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); - // now abs_gini = 1 - - std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); - // now abs_gini = 0 - - std::vector u{-1, 1, -1}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end()); - // now abs_gini = 0 - -Again, Wikipedia denotes our scaling as a "sample Gini coefficient". -We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient, -whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. - -If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), -consider calculating the Hoyer sparsity instead. - -[heading Hoyer Sparsity] - -The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. -As the name suggests, it is used to measure sparsity in an expansion in some basis. - -The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). -For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. - -Usage: - - std::vector v{1,0,0}; - Real hs = boost::math::tools::hoyer_sparsity(v); - // hs = 1 - std::vector v{1,-1,1}; - Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); - // hs = 0 - -The container must be forward iterable and the contents are not modified. -Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. - -[heading Shannon Entropy] - - std::vector v{1/2.0, 1/2.0}; - double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); - // Hs = ln(2). - -The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced. -The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. - -[heading Shannon Cost] - - std::vector v{-1, 1,-1}; - double Ks = boost::math::tools::shannon_cost(v.begin(), v.end()); - // Ks = 0; concentration of the vector is minimized. - -The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. -The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). -See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. - - [heading References] * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. @@ -234,4 +131,4 @@ See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for deta * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. [endsect] -[/section:descriptive_statistics Descriptive Statistics] +[/section:univariate_statistics Univariate Statistics] diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp index 8563b15704..449d2eb530 100644 --- a/include/boost/math/tools/norms.hpp +++ b/include/boost/math/tools/norms.hpp @@ -12,10 +12,6 @@ #include #include -/* - * A set of tools for computing scalar quantities associated with lists of numbers. - */ - namespace boost{ namespace math{ namespace tools { @@ -29,12 +25,32 @@ auto total_variation(ForwardIterator first, ForwardIterator last) Real tv = 0; auto it = first; Real tmp = *it; - while (++it != last) + + if constexpr (std::is_unsigned::value) { - tv += abs(*it - tmp); - tmp = *it; + while (++it != last) + { + if (*it > tmp) + { + tv += *it - tmp; + } + else + { + tv += tmp - *it; + } + tmp = *it; + } + return tv; + } + else + { + while (++it != last) + { + tv += abs(*it - tmp); + tmp = *it; + } + return tv; } - return tv; } template diff --git a/include/boost/math/tools/descriptive_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp similarity index 55% rename from include/boost/math/tools/descriptive_statistics.hpp rename to include/boost/math/tools/signal_statistics.hpp index 4e625e5c59..221978fe18 100644 --- a/include/boost/math/tools/descriptive_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -3,8 +3,8 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP -#define BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP +#ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP +#define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP #include #include @@ -12,119 +12,9 @@ #include #include -/* - * A set of tools for computing scalar quantities associated with lists of numbers. - */ - namespace boost{ namespace math{ namespace tools { -template -auto -mean(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); - if constexpr (std::is_integral::value) - { - double mu = 0; - double i = 1; - for(auto it = first; it != last; ++it) { - mu = mu + (*it - mu)/i; - i += 1; - } - return mu; - } - else - { - Real mu = 0; - Real i = 1; - for(auto it = first; it != last; ++it) { - mu = mu + (*it - mu)/i; - i += 1; - } - return mu; - } -} - -template -inline auto mean(Container const & v) -{ - return mean(v.cbegin(), v.cend()); -} - -template -auto -mean_and_population_variance(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); - // Higham, Accuracy and Stability, equation 1.6a and 1.6b: - if constexpr (std::is_integral::value) - { - double M = *first; - double Q = 0; - double k = 2; - for (auto it = first + 1; it != last; ++it) - { - double tmp = *it - M; - Q = Q + ((k-1)*tmp*tmp)/k; - M = M + tmp/k; - k += 1; - } - return std::make_pair(M, Q/(k-1)); - } - else - { - Real M = *first; - Real Q = 0; - Real k = 2; - for (auto it = first + 1; it != last; ++it) - { - Real tmp = *it - M; - Q = Q + ((k-1)*tmp*tmp)/k; - M = M + tmp/k; - k += 1; - } - - return std::make_pair(M, Q/(k-1)); - } -} - -template -inline auto mean_and_population_variance(Container const & v) -{ - return mean_and_population_variance(v.cbegin(), v.cend()); -} - -template -auto median(RandomAccessIterator first, RandomAccessIterator last) -{ - size_t num_elems = std::distance(first, last); - BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); - if (num_elems & 1) - { - auto middle = first + (num_elems - 1)/2; - std::nth_element(first, middle, last); - return *middle; - } - else - { - auto middle = first + num_elems/2 - 1; - std::nth_element(first, middle, last); - std::nth_element(middle, middle+1, last); - return (*middle + *(middle+1))/2; - } -} - - -template -inline auto median(RandomAccessContainer & v) -{ - return median(v.begin(), v.end()); -} - - template auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) { @@ -202,40 +92,6 @@ inline auto shannon_cost(Container const & v) } -template -auto gini_coefficient(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); - - std::sort(first, last); - - Real i = 1; - Real num = 0; - Real denom = 0; - for (auto it = first; it != last; ++it) - { - num += *it*i; - denom += *it; - ++i; - } - - // If the l1 norm is zero, all elements are zero, so every element is the same. - if (denom == 0) - { - return Real(0); - } - - return ((2*num)/denom - i)/(i-2); -} - -template -inline auto gini_coefficient(RandomAccessContainer & v) -{ - return gini_coefficient(v.begin(), v.end()); -} - - template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) { diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp new file mode 100644 index 0000000000..8ec3fed843 --- /dev/null +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -0,0 +1,158 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP +#define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP + +#include +#include +#include +#include +#include + + +namespace boost{ namespace math{ namespace tools { + +template +auto +mean(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + if constexpr (std::is_integral::value) + { + double mu = 0; + double i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } + else + { + Real mu = 0; + Real i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } +} + +template +inline auto mean(Container const & v) +{ + return mean(v.cbegin(), v.cend()); +} + +template +auto +mean_and_population_variance(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); + // Higham, Accuracy and Stability, equation 1.6a and 1.6b: + if constexpr (std::is_integral::value) + { + double M = *first; + double Q = 0; + double k = 2; + for (auto it = first + 1; it != last; ++it) + { + double tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + return std::make_pair(M, Q/(k-1)); + } + else + { + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = first + 1; it != last; ++it) + { + Real tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + + return std::make_pair(M, Q/(k-1)); + } +} + +template +inline auto mean_and_population_variance(Container const & v) +{ + return mean_and_population_variance(v.cbegin(), v.cend()); +} + +template +auto median(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + + +template +inline auto median(RandomAccessContainer & v) +{ + return median(v.begin(), v.end()); +} + + +template +auto gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last); + + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + + return ((2*num)/denom - i)/(i-2); +} + +template +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(v.begin(), v.end()); +} + +}}} +#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index a8fa366ac4..2bb6afd7cf 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -902,8 +902,9 @@ test-suite misc : [ run test_constant_generate.cpp : : : release USE_CPP_FLOAT=1 off:no ] [ run test_cubic_b_spline.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] off msvc:/bigobj release ] [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] # does not in fact require C++17 constexpr; requires C++17 std::size. - [ run descriptive_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] + [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] + [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ] [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/norms_test.cpp b/test/norms_test.cpp index 470c86c721..af7a89d582 100644 --- a/test/norms_test.cpp +++ b/test/norms_test.cpp @@ -109,6 +109,12 @@ void test_integer_total_variation() std::array w{1,1}; tv = boost::math::tools::total_variation(w); BOOST_TEST_EQ(tv,0); + + // Work with both signed and unsigned integers? + std::array u{1, 2, 1, 2}; + tv = boost::math::tools::total_variation(u); + BOOST_TEST_EQ(tv, 3); + } template @@ -335,6 +341,11 @@ int main() test_integer_l1_norm(); + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm>(); + test_complex_l1_norm(); + test_complex_l2_norm>(); test_complex_l2_norm>(); test_complex_l2_norm>(); @@ -345,16 +356,12 @@ int main() test_l2_norm(); test_l2_norm(); - test_complex_l1_norm>(); - test_complex_l1_norm>(); - test_complex_l1_norm>(); - test_complex_l1_norm(); - test_total_variation(); test_total_variation(); test_total_variation(); test_total_variation(); + test_integer_total_variation(); test_integer_total_variation(); return boost::report_errors(); diff --git a/test/descriptive_statistics_test.cpp b/test/signal_statistics_test.cpp similarity index 55% rename from test/descriptive_statistics_test.cpp rename to test/signal_statistics_test.cpp index 929ef40ed1..c232fd4867 100644 --- a/test/descriptive_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -29,162 +29,6 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ -template -void test_integer_mean() -{ - double tol = std::numeric_limits::epsilon(); - std::vector v{1,2,3,4,5}; - double mu = boost::math::tools::mean(v); - BOOST_TEST(abs(mu - 3) < tol); - - // Work with std::array? - std::array w{1,2,3,4,5}; - mu = boost::math::tools::mean(w); - BOOST_TEST(abs(mu - 3) < tol); -} - -template -void test_mean() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,2,3,4,5}; - Real mu = boost::math::tools::mean(v.begin(), v.end()); - BOOST_TEST(abs(mu - 3) < tol); - - // Does range call work? - mu = boost::math::tools::mean(v); - BOOST_TEST(abs(mu - 3) < tol); - - // Can we successfully average only part of the vector? - mu = boost::math::tools::mean(v.begin(), v.begin() + 3); - BOOST_TEST(abs(mu - 2) < tol); - - // Does it work when we const qualify? - mu = boost::math::tools::mean(v.cbegin(), v.cend()); - BOOST_TEST(abs(mu - 3) < tol); - - // Does it work for std::array? - std::array u{1,2,3,4,5,6,7}; - mu = boost::math::tools::mean(u.begin(), u.end()); - BOOST_TEST(abs(mu - 4) < tol); - - // Does it work for a forward iterator? - std::forward_list l{1,2,3,4,5,6,7}; - mu = boost::math::tools::mean(l.begin(), l.end()); - BOOST_TEST(abs(mu - 4) < tol); - - // Does it work with ublas vectors? - boost::numeric::ublas::vector w(7); - for (size_t i = 0; i < w.size(); ++i) - { - w[i] = i+1; - } - mu = boost::math::tools::mean(w.cbegin(), w.cend()); - BOOST_TEST(abs(mu - 4) < tol); - -} - -template -void test_complex_mean() -{ - typedef typename Complex::value_type Real; - Real tol = std::numeric_limits::epsilon(); - std::vector v{{0,1},{0,2},{0,3},{0,4},{0,5}}; - auto mu = boost::math::tools::mean(v.begin(), v.end()); - BOOST_TEST(abs(mu.imag() - 3) < tol); - BOOST_TEST(abs(mu.real()) < tol); - - // Does range work? - mu = boost::math::tools::mean(v); - BOOST_TEST(abs(mu.imag() - 3) < tol); - BOOST_TEST(abs(mu.real()) < tol); -} - -template -void test_mean_and_population_variance() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1,1,1,1}; - auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end()); - BOOST_TEST(abs(mu - 1) < tol); - BOOST_TEST(abs(sigma_sq) < tol); - - std::vector u{1}; - auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend()); - BOOST_TEST(abs(mu1 - 1) < tol); - BOOST_TEST(abs(sigma1_sq) < tol); - - std::array w{0,1,0,1,0,1,0,1}; - auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); - BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); - BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); - - auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w); - BOOST_TEST(abs(mu3 - 1.0/2.0) < tol); - BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol); - -} - -template -void test_integer_mean_and_population_variance() -{ - double tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1,1,1,1}; - auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v); - BOOST_TEST(abs(mu - 1) < tol); - BOOST_TEST(abs(sigma_sq) < tol); -} - -template -void test_median() -{ - std::mt19937 g(12); - std::vector v{1,2,3,4,5,6,7}; - - Real m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 4); - - std::shuffle(v.begin(), v.end(), g); - // Does range call work? - m = boost::math::tools::median(v); - BOOST_TEST_EQ(m, 4); - - v = {1,2,3,3,4,5}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - - v = {1}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {1,1}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {2,4}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - - v = {1,1,1}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {1,2,3}; - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 2); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 2); - - // Does it work with std::array? - std::array w{1,2,3}; - m = boost::math::tools::median(w); - BOOST_TEST_EQ(m, 2); -} - template void test_absolute_median() { @@ -261,34 +105,6 @@ void test_complex_absolute_median() } -template -void test_gini_coefficient() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,0,0}; - Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); - BOOST_TEST(abs(gini - 1) < tol); - - gini = boost::math::tools::gini_coefficient(v); - BOOST_TEST(abs(gini - 1) < tol); - - v[0] = 1; - v[1] = 1; - v[2] = 1; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); - BOOST_TEST(abs(gini) < tol); - - v[0] = 0; - v[1] = 0; - v[2] = 0; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); - BOOST_TEST(abs(gini) < tol); - - std::array w{0,0,0}; - gini = boost::math::tools::gini_coefficient(w); - BOOST_TEST(abs(gini) < tol); -} - template void test_hoyer_sparsity() { @@ -411,30 +227,6 @@ void test_shannon_entropy() int main() { - test_integer_mean(); - test_integer_mean(); - test_integer_mean(); - - test_mean(); - test_mean(); - test_mean(); - test_mean(); - - test_complex_mean>(); - test_complex_mean(); - - test_mean_and_population_variance(); - test_mean_and_population_variance(); - test_mean_and_population_variance(); - test_mean_and_population_variance(); - - test_integer_mean_and_population_variance(); - - test_median(); - test_median(); - test_median(); - test_median(); - test_absolute_median(); test_absolute_median(); test_absolute_median(); @@ -445,11 +237,6 @@ int main() test_complex_absolute_median>(); test_complex_absolute_median(); - test_gini_coefficient(); - test_gini_coefficient(); - test_gini_coefficient(); - test_gini_coefficient(); - test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp new file mode 100644 index 0000000000..955d1ad067 --- /dev/null +++ b/test/univariate_statistics_test.cpp @@ -0,0 +1,248 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + +template +void test_integer_mean() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + double mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); + + // Work with std::array? + std::array w{1,2,3,4,5}; + mu = boost::math::tools::mean(w); + BOOST_TEST(abs(mu - 3) < tol); +} + +template +void test_mean() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3,4,5}; + Real mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu - 3) < tol); + + // Does range call work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu - 3) < tol); + + // Can we successfully average only part of the vector? + mu = boost::math::tools::mean(v.begin(), v.begin() + 3); + BOOST_TEST(abs(mu - 2) < tol); + + // Does it work when we const qualify? + mu = boost::math::tools::mean(v.cbegin(), v.cend()); + BOOST_TEST(abs(mu - 3) < tol); + + // Does it work for std::array? + std::array u{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(u.begin(), u.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work for a forward iterator? + std::forward_list l{1,2,3,4,5,6,7}; + mu = boost::math::tools::mean(l.begin(), l.end()); + BOOST_TEST(abs(mu - 4) < tol); + + // Does it work with ublas vectors? + boost::numeric::ublas::vector w(7); + for (size_t i = 0; i < w.size(); ++i) + { + w[i] = i+1; + } + mu = boost::math::tools::mean(w.cbegin(), w.cend()); + BOOST_TEST(abs(mu - 4) < tol); + +} + +template +void test_complex_mean() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{0,1},{0,2},{0,3},{0,4},{0,5}}; + auto mu = boost::math::tools::mean(v.begin(), v.end()); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); + + // Does range work? + mu = boost::math::tools::mean(v); + BOOST_TEST(abs(mu.imag() - 3) < tol); + BOOST_TEST(abs(mu.real()) < tol); +} + +template +void test_mean_and_population_variance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end()); + BOOST_TEST(abs(mu - 1) < tol); + BOOST_TEST(abs(sigma_sq) < tol); + + std::vector u{1}; + auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend()); + BOOST_TEST(abs(mu1 - 1) < tol); + BOOST_TEST(abs(sigma1_sq) < tol); + + std::array w{0,1,0,1,0,1,0,1}; + auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); + BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); + BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); + + auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w); + BOOST_TEST(abs(mu3 - 1.0/2.0) < tol); + BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol); + +} + +template +void test_integer_mean_and_population_variance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1,1,1}; + auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v); + BOOST_TEST(abs(mu - 1) < tol); + BOOST_TEST(abs(sigma_sq) < tol); +} + +template +void test_median() +{ + std::mt19937 g(12); + std::vector v{1,2,3,4,5,6,7}; + + Real m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 4); + + std::shuffle(v.begin(), v.end(), g); + // Does range call work? + m = boost::math::tools::median(v); + BOOST_TEST_EQ(m, 4); + + v = {1,2,3,3,4,5}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {2,4}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 3); + + v = {1,1,1}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + v = {1,2,3}; + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median(v.begin(), v.end()); + BOOST_TEST_EQ(m, 2); + + // Does it work with std::array? + std::array w{1,2,3}; + m = boost::math::tools::median(w); + BOOST_TEST_EQ(m, 2); +} + +template +void test_gini_coefficient() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini - 1) < tol); + + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - 1) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); +} + +int main() +{ + test_integer_mean(); + test_integer_mean(); + test_integer_mean(); + + test_mean(); + test_mean(); + test_mean(); + test_mean(); + + test_complex_mean>(); + test_complex_mean(); + + test_mean_and_population_variance(); + test_mean_and_population_variance(); + test_mean_and_population_variance(); + test_mean_and_population_variance(); + + test_integer_mean_and_population_variance(); + + test_median(); + test_median(); + test_median(); + test_median(); + + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + + return boost::report_errors(); +} From fb2ae307d6fd12f9f4a67effefaf768b28529490 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 10 Dec 2018 14:38:42 -0700 Subject: [PATCH 11/46] Computation of covariance. [CI SKIP] --- doc/math.qbk | 1 + .../bivariate_statistics.qbk | 54 ++++++++ .../boost/math/tools/bivariate_statistics.hpp | 53 ++++++++ test/Jamfile.v2 | 1 + test/bivariate_statistics_test.cpp | 121 ++++++++++++++++++ 5 files changed, 230 insertions(+) create mode 100644 doc/vector_functionals/bivariate_statistics.qbk create mode 100644 include/boost/math/tools/bivariate_statistics.hpp create mode 100644 test/bivariate_statistics_test.cpp diff --git a/doc/math.qbk b/doc/math.qbk index 0903a6d97a..3d184ad8d3 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -554,6 +554,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [mathpart vector_functionals Vector Functionals] [include vector_functionals/univariate_statistics.qbk] +[include vector_functionals/bivariate_statistics.qbk] [include vector_functionals/signal_statistics.qbk] [include vector_functionals/norms.qbk] [endmathpart] [/section:vector_functionals Vector Functionals] diff --git a/doc/vector_functionals/bivariate_statistics.qbk b/doc/vector_functionals/bivariate_statistics.qbk new file mode 100644 index 0000000000..8cf4dcdc7d --- /dev/null +++ b/doc/vector_functionals/bivariate_statistics.qbk @@ -0,0 +1,54 @@ +[/ + Copyright 2018 Nick Thompson + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] + +[section:bivariate_statistics Bivariate Statistics] + +[heading Synopsis] + +`` +#include + +namespace boost{ namespace math{ namespace tools { + + template + auto population_covariance(Container const & u, Container const & v); + + template + auto means_and_population_covariance(Container const & u, Container const & v); + +}}} +`` + +[heading Description] + +This file provides functions for computing bivariate statistics. + +[heading Population Covariance] + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + double cov_uv = boost::math::tools::population_covariance(u, v); + +The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al]. +The data is not modified and must be forward iterable. +Works with real-valued inputs and does not work with complex-valued inputs. + +The algorithm used herein simultaneously generates the mean values of the input data /u/ and /v/. +For certain applications, it might be useful to get them in a single pass. +As such, we provide `means_and_population_covariance`: + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_population_covariance(u, v); + +[heading References] + +* Bennett, Janine, et al. ['Numerically stable, single-pass, parallel statistics algorithms.] Cluster Computing and Workshops, 2009. CLUSTER'09. IEEE International Conference on. IEEE, 2009. + +[endsect] +[/section:bivariate_statistics Bivariate Statistics] diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp new file mode 100644 index 0000000000..4a2a3883ba --- /dev/null +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -0,0 +1,53 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP +#define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP + +#include +#include +#include +#include +#include + + +namespace boost{ namespace math{ namespace tools { + +template +auto +means_and_population_covariance(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample."); + + // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = (u[i] - mu_u)/(i+1); + Real v_tmp = v[i] - mu_v; + cov += i*u_tmp*v_tmp; + mu_u = mu_u + u_tmp; + mu_v = mu_v + v_tmp/(i+1); + } + + return std::make_tuple(mu_u, mu_v, cov/size(u)); +} + +template +auto +population_covariance(Container const & u, Container const & v) +{ + auto [mu_u, mu_v, cov] = boost::math::tools::means_and_population_covariance(u, v); + return cov; +} + +}}} +#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 2bb6afd7cf..b15f009fb0 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -905,6 +905,7 @@ test-suite misc : [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ] + [ run bivariate_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ] [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp new file mode 100644 index 0000000000..924cb68494 --- /dev/null +++ b/test/bivariate_statistics_test.cpp @@ -0,0 +1,121 @@ +/* + * (C) Copyright Nick Thompson 2018. + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using boost::multiprecision::cpp_bin_float_50; +using boost::multiprecision::cpp_complex_50; + +/* + * Test checklist: + * 1) Does it work with multiprecision? + * 2) Does it work with .cbegin()/.cend() if the data is not altered? + * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) + * 4) Does it work with std::forward_list if a forward iterator is all that is required? + * 5) Does it work with complex data if complex data is sensible? + */ + +using boost::math::tools::means_and_population_covariance; +using boost::math::tools::population_covariance; + +template +void test_covariance() +{ + std::cout << std::setprecision(std::numeric_limits::digits10+1); + Real tol = std::numeric_limits::epsilon(); + using std::abs; + + // Covariance of a single thing is zero: + std::array u1{8}; + std::array v1{17}; + auto [mu_u1, mu_v1, cov1] = means_and_population_covariance(u1, v1); + + BOOST_TEST(abs(cov1) < tol); + BOOST_TEST(abs(mu_u1 - 8) < tol); + BOOST_TEST(abs(mu_v1 - 17) < tol); + + + std::array u2{8, 4}; + std::array v2{3, 7}; + auto [mu_u2, mu_v2, cov2] = means_and_population_covariance(u2, v2); + + BOOST_TEST(abs(cov2+4) < tol); + BOOST_TEST(abs(mu_u2 - 6) < tol); + BOOST_TEST(abs(mu_v2 - 5) < tol); + + std::vector u3{1,2,3}; + std::vector v3{1,1,1}; + + auto [mu_u3, mu_v3, cov3] = means_and_population_covariance(u3,v3); + + // Since v is constant, covariance(u,v) = 0 against everything any u: + BOOST_TEST(abs(cov3) < tol); + BOOST_TEST(abs(mu_u3 - 2) < tol); + BOOST_TEST(abs(mu_v3 - 1) < tol); + // Make sure we pull the correct symbol out of means_and_populaton_covariance: + cov3 = population_covariance(u3, v3); + BOOST_TEST(abs(cov3) < tol); + + cov3 = population_covariance(v3, u3); + // Covariance is symmetric: cov(u,v) = cov(v,u) + BOOST_TEST(abs(cov3) < tol); + + // cov(u,u) = sigma(u)^2: + cov3 = population_covariance(u3, u3); + Real expected = Real(2)/Real(3); + + BOOST_TEST(abs(cov3 - expected) < tol); + + std::mt19937 gen(15); + // Can't template standard library on multiprecision, so use double and cast back: + std::uniform_real_distribution dis(-1.0, 1.0); + std::vector u(500); + std::vector v(500); + for(size_t i = 0; i < u.size(); ++i) { + u[i] = (Real) dis(gen); + v[i] = (Real) dis(gen); + } + + auto [mu_u, sigma_u_sq] = boost::math::tools::mean_and_population_variance(u); + auto [mu_v, sigma_v_sq] = boost::math::tools::mean_and_population_variance(v); + + auto [mu_u_, mu_v_, cov_uv] = means_and_population_covariance(u, v); + BOOST_TEST(abs(mu_u - mu_u_) < tol); + BOOST_TEST(abs(mu_v - mu_v_) < tol); + + // Cauchy-Schwartz inequality: + BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq); + // cov(X, X) = sigma(X)^2: + Real cov_uu = population_covariance(u, u); + BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol); + Real cov_vv = population_covariance(v, v); + BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol); + +} + +int main() +{ + test_covariance(); + test_covariance(); + test_covariance(); + test_covariance(); + + return boost::report_errors(); +} From b0b0a6bb59ff4ec88b9f164cecb658c72a95c804 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Tue, 11 Dec 2018 10:53:33 -0700 Subject: [PATCH 12/46] Add oracle_snr to signal_statistics [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 39 ++++++++- .../boost/math/tools/signal_statistics.hpp | 86 +++++++++++++++++++ test/signal_statistics_test.cpp | 61 +++++++++++++ 3 files changed, 185 insertions(+), 1 deletion(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 3e1bae7d00..e6ae829eae 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -45,6 +45,11 @@ namespace boost{ namespace math{ namespace tools { template auto shannon_cost(ForwardIterator first, ForwardIterator last); + template + auto oracle_snr(Container const & signal, Container const & noise); + + template + auto oracle_snr_db(Container const & signal, Container const & noise); }}} `` @@ -129,9 +134,40 @@ The natural logarithm is used to compute the Shannon entropy; all other "Shannon // Ks = 0; concentration of the vector is minimized. The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. -The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295/w/) = /K/[sub /s/](v) + /K/[sub /s/](w). +The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. +[heading Oracle Signal-to-noise ratio] + +The function `oracle_snr` computes the ratio \u2016 /s/ \u2016[sub 2][super 2] / \u2016 /w/ \u2016[sub 2][super 2], where /s/ is signal and /w/ is noise. +The function `oracle_snr_db` computes 10`log`[sub 10](\u2016 /s/ \u2016[super 2] / \u2016 /w/ \u2016[super 2]). +In general, one does not know how to decompose a real signal /x/ into /s/ + /w/ and as such /s/ is regarded as oracle information. +Hence this function is mainly useful for unit testing other SNR measurements. + +Usage: + + std::vector signal(500, 3.2); + std::vector noise(500); + // fill 'noise' with Gaussian white noise... + double snr_db = boost::math::tools::oracle_snr_db(signal, noise); + double snr = boost::math::tools::oracle_snr(signal, noise); + +The call should return the same value as [@https://www.mathworks.com/help/signal/ref/snr.html Matlab's `snr`]. + +The input can be real, complex, or integral. +Integral inputs produce double precision floating point outputs. +The input data is not modified and must satisfy the requirements of a `RandomAccessContainer`. + +[heading /M/[sub 2]/M/[sub 4] SNR Estimation] + +Estimates the SNR of a noisy signal via the /M/[sub 2]/M/[sub 4] method. +See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] for details. + + +[heading SVR SNR Estimation] + +Estimates the SNR of a noisy signal /x/ via the SVR method. +See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] for details. [heading References] @@ -139,6 +175,7 @@ See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for deta * Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. * Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. +* D. R. Pauluzzi and N. C. Beaulieu, ['A comparison of SNR estimation techniques for the AWGN channel,] IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. [endsect] [/section:signal_statistics Signal Statistics] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 221978fe18..b88974c5d3 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -166,5 +166,91 @@ inline auto hoyer_sparsity(Container const & v) } +template +auto oracle_snr(Container const & signal, Container const & noise) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noise.size(), "Signal and noise must be have the same number of elements."); + if constexpr (std::is_integral::value) + { + double numerator = 0; + double denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += noise[i]*noise[i]; + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + return numerator/denominator; + } + else if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + + { + using std::norm; + typename Real::value_type numerator = 0; + typename Real::value_type denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += norm(signal[i]); + denominator += norm(noise[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } + else + { + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += noise[i]*noise[i]; + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } +} + +// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. +template +auto oracle_snr_db(Container const & signal, Container const & noise) +{ + using std::log10; + return 10*log10(oracle_snr(signal, noise)); +} + +// Of course since we have an oracle snr estimator, we should have an snr estimator not requiring oracle data. +// The M2M4 estimator is reputed to be quite good, as is the SVR measure. +// A good reference is: +// D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. +// A nice python implementation: +// https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py +// However, we have not implemented kurtosis and kurtosis, which is required of the method. + + }}} #endif diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index c232fd4867..33bd0cd65a 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -224,6 +224,55 @@ void test_shannon_entropy() BOOST_TEST(abs(Hs - expected) < tol*expected); } +template +void test_oracle_snr() +{ + using std::abs; + Real tol = 100*std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, 1); + std::vector noise(length, 0); + + noise[0] = 1; + Real snr = boost::math::tools::oracle_snr(signal, noise); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noise); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} + +template +void test_integer_oracle_snr() +{ + using std::abs; + double tol = std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, 1); + std::vector noise(length, 0); + + noise[0] = 1; + double snr = boost::math::tools::oracle_snr(signal, noise); + double snr_db = boost::math::tools::oracle_snr_db(signal, noise); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} + +template +void test_complex_oracle_snr() +{ + using Real = typename Complex::value_type; + using std::abs; + using std::log10; + Real tol = 100*std::numeric_limits::epsilon(); + size_t length = 100; + std::vector signal(length, {1,0}); + std::vector noise(length, {0,0}); + + noise[0] = {1,0}; + Real snr = boost::math::tools::oracle_snr(signal, noise); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noise); + BOOST_TEST(abs(snr - length) < tol); + BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); +} int main() { @@ -258,5 +307,17 @@ int main() test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity(); + test_oracle_snr(); + test_oracle_snr(); + test_oracle_snr(); + test_oracle_snr(); + + test_integer_oracle_snr(); + + test_complex_oracle_snr>(); + test_complex_oracle_snr>(); + test_complex_oracle_snr>(); + test_complex_oracle_snr(); + return boost::report_errors(); } From 87f4bc9b117eadbd29050b5bdf873021283655da Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Tue, 11 Dec 2018 14:01:28 -0700 Subject: [PATCH 13/46] Add skewness and kurtosis [CI SKIP] --- .../univariate_statistics.qbk | 38 ++++- .../math/tools/univariate_statistics.hpp | 140 ++++++++++++++++++ test/univariate_statistics_test.cpp | 93 ++++++++++++ 3 files changed, 268 insertions(+), 3 deletions(-) diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 5bb1e36934..d4d923f5ec 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -33,6 +33,12 @@ namespace boost{ namespace math{ namespace tools { template auto median(ForwardIterator first, ForwardIterator last); + template + auto population_skewness(Container const & c); + + template + auto population_skewness(ForwardIterator first, ForwardIterator last); + template auto gini_coefficient(Container & c); @@ -100,6 +106,34 @@ Compute the median of a dataset: The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. Therefore, all requirements of `std::nth_element` are inherited by the median calculation. +[heading Skewness] + +Computes the skewness of a dataset: + + std::vector v{1,2,3,4,5}; + double skewness = boost::math::tools::population_skewness(v); + // skewness = 0. + +The input vector is not modified, works with integral and real data. +If the input data is integral, the output is a double precision float. + +For a dataset consisting of a constant value, we return zero as the skewness. + +The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. + +[heading Kurtosis] + +Computes the kurtosis of a dataset: + + std::vector v{1,2,3,4,5}; + double kurtosis = boost::math::tools::population_kurtosis(v); + // kurtosis = 17/10 + +The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. +The input data must be forward iterable and must consist of real or integral values. +If the input data is integral, the output is a double precision float. +Note that this is /not/ the excess kurtosis. +If you require the excess kurtosis, subtract 3 from the kurtosis. [heading Gini Coefficient] @@ -126,9 +160,7 @@ You should have /very/ good cause to pass negative values to the Gini coefficien [heading References] * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. -* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. -* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. -* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. +* Philippe P. Pébay: ["Formulas for Robust, One-Pass Parallel Computation of Covariances and Arbitrary-Order Statistical Moments.] Technical Report SAND2008-6212, Sandia National Laboratories, September 2008. [endsect] [/section:univariate_statistics Univariate Statistics] diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 8ec3fed843..5e0570cd92 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -154,5 +154,145 @@ inline auto gini_coefficient(RandomAccessContainer & v) return gini_coefficient(v.begin(), v.end()); } +// Follows equation 1.5 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto +population_skewness(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double n = 2; + for (auto it = first + 1; it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + double variance = M2/(n-1); + if (variance == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return double(0); + } + double skewness = M3/((n-1)*variance*sqrt(variance)); + return skewness; + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real n = 2; + for (auto it = first + 1; it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + Real variance = M2/(n-1); + if (variance == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return Real(0); + } + Real skewness = M3/((n-1)*variance*sqrt(variance)); + return skewness; + } +} + +template +inline auto population_skewness(Container const & v) +{ + return population_skewness(v.cbegin(), v.cend()); +} + +// Follows equation 1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto +population_kurtosis(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute kurtosis."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double M4 = 0; + double n = 2; + for (auto it = first + 1; it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + double variance = M2/(n-1); + if (variance == 0) + { + return double(0); + } + double kurtosis = M4/((n-1)*variance*variance); + return kurtosis; + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real M4 = 0; + Real n = 2; + for (auto it = first + 1; it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + Real variance = M2/(n-1); + if (variance == 0) + { + // Again, the limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no kurtosis. + return Real(0); + } + Real kurtosis = M4/((n-1)*variance*variance); + return kurtosis; + } +} + +template +inline auto population_kurtosis(Container const & v) +{ + return population_kurtosis(v.cbegin(), v.cend()); +} + + + }}} #endif diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index 955d1ad067..ded487c998 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -213,6 +213,85 @@ void test_gini_coefficient() BOOST_TEST(abs(gini) < tol); } +template +void test_integer_skewness() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew - 3.0/2.0) < tol); + +} + +template +void test_skewness() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::population_skewness(v); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + +} + +template +void test_kurtosis() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis - Real(17)/Real(10)) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis- Real(13)/Real(4)) < tol); +} + +template +void test_integer_kurtosis() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis - 17.0/10.0) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurtosis = boost::math::tools::population_kurtosis(v); + BOOST_TEST(abs(kurtosis- 13.0/4.0) < tol); +} + + int main() { test_integer_mean(); @@ -244,5 +323,19 @@ int main() test_gini_coefficient(); test_gini_coefficient(); + test_skewness(); + test_skewness(); + test_skewness(); + test_skewness(); + + test_integer_skewness(); + + test_kurtosis(); + test_kurtosis(); + test_kurtosis(); + test_kurtosis(); + + test_integer_kurtosis(); + return boost::report_errors(); } From d940760e0b7344093b3f687097acad23f7818ee2 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Wed, 12 Dec 2018 22:40:30 -0700 Subject: [PATCH 14/46] Implement the M2M4 SNR estimator. --- doc/vector_functionals/signal_statistics.qbk | 51 ++++- .../boost/math/tools/signal_statistics.hpp | 181 +++++++++++++++++- .../math/tools/univariate_statistics.hpp | 56 ++++++ test/signal_statistics_test.cpp | 38 ++++ 4 files changed, 318 insertions(+), 8 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index e6ae829eae..8ef112a377 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -51,6 +51,12 @@ namespace boost{ namespace math{ namespace tools { template auto oracle_snr_db(Container const & signal, Container const & noise); + template + auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); + + template + auto m2m4_snr_estimator_db(Container const & noisy_signal,typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); + }}} `` @@ -161,13 +167,49 @@ The input data is not modified and must satisfy the requirements of a `RandomAcc [heading /M/[sub 2]/M/[sub 4] SNR Estimation] Estimates the SNR of a noisy signal via the /M/[sub 2]/M/[sub 4] method. -See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] for details. +See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] and [@https://doi.org/10.1109/ISIT.1994.394869 Matzner and Englberger] for details. + + std::vector noisy_signal(512); + // fill noisy_signal with data contaminated by Gaussian white noise: + double est_snr = boost::math::tools::m2m4_snr_estimator_db(noisy_signal); + +The /M/[sub 2]/M/[sub 4] SNR estimator is an "in-service" estimator, meaning that the estimate is made using the noisy, data-bearing signal, and does not require a background estimate. +This estimator has been found to be work best between roughly -3 and 15db, tending to overestimate the noise below -3db, and underestimate the noise above 15db. +See [@https://www.mdpi.com/2078-2489/8/3/75/pdf Xue et al] for details. +The /M/[sub 2]/M/[sub 4] SNR estimator, by default, assumes that the kurtosis of the signal is 1 and the kurtosis of the noise is 3, the latter corresponding to Gaussian noise. +These parameters, however, can be overridden: -[heading SVR SNR Estimation] + std::vector noisy_signal(512); + // fill noisy_signal with the data: + double signal_kurtosis = 1.5; + // Noise is assumed to follow Laplace distribution, which has kurtosis of 6: + double noise_kurtosis = 6; + double est_snr = boost::math::tools::m2m4_snr_estimator_db(noisy_signal, signal_kurtosis, noise_kurtosis); + +Now, technically the method is a "blind SNR estimator", meaning that the no /a-priori/ information about the signal is required to use the method. +However, the performance of the method is /vastly/ better if you can come up with a better estimate of the signal and noise kurtosis. +How can we do this? Suppose we know that the SNR is much greater than 1. +Then we can estimate the signal kurtosis simply by using the noisy signal kurtosis. +If the SNR is much less than one, this method breaks down as the noisy signal kurtosis will tend to the noise kurtosis-though in this limit we have an excellent estimator of the noise kurtosis! +In addition, if you have a model of what your signal should look like, you can precompute the signal kurtosis. +For example, sinusoids have a kurtosis of 1.5. +See [@http://www.jcomputers.us/vol8/jcp0808-21.pdf here] for a study which uses estimates of this sort to improve the performance of the /M/[sub 2]/M/[sub 4] estimator. + + +/Nota bene/: The traditional definition of SNR is /not/ mean invariant. +By this we mean that if a constant is added to every sample of a signal, the SNR is changed. +For example, adding DC bias to a signal changes its SNR. +For most use cases, this is really not what you intend; for example a signal consisting of zeros plus Gaussian noise has an SNR of zero, +whereas a signal with a constant DC bias and random Gaussian noise might have a very large SNR. + +The /M/[sub 2]/M/[sub 4] SNR estimator is computed from mean-invariant quantities, +and hence it should really be compared to the mean-invariant SNR. + +/Nota bene/: This computation requires the solution of a system of quadratic equations involving the noise kurtosis, the signal kurtosis, and the second and fourth moments of the data. +There is no guarantee that a solution of this system exists for all value of these parameters, in fact nonexistence can easily be demonstrated for certain data. +If there is no solution to the system, then failure is communicated by returning NaNs. -Estimates the SNR of a noisy signal /x/ via the SVR method. -See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] for details. [heading References] @@ -177,5 +219,6 @@ See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] for details * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. * D. R. Pauluzzi and N. C. Beaulieu, ['A comparison of SNR estimation techniques for the AWGN channel,] IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. + [endsect] [/section:signal_statistics Signal Statistics] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index b88974c5d3..0556a9ef5d 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace boost{ namespace math{ namespace tools { @@ -235,6 +236,43 @@ auto oracle_snr(Container const & signal, Container const & noise) } } +template +auto mean_invariant_oracle_snr(Container const & signal, Container const & noise) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noise.size(), "Signal and noise must be have the same number of elements."); + + Real mean = boost::math::tools::mean(signal); + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + Real tmp = signal[i] - mean; + numerator += tmp*tmp; + denominator += noise[i]*noise[i]; + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + +} + +// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. +template +auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noise) +{ + using std::log10; + return 10*log10(mean_invariant_oracle_snr(signal, noise)); +} + + // Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. template auto oracle_snr_db(Container const & signal, Container const & noise) @@ -243,14 +281,149 @@ auto oracle_snr_db(Container const & signal, Container const & noise) return 10*log10(oracle_snr(signal, noise)); } -// Of course since we have an oracle snr estimator, we should have an snr estimator not requiring oracle data. -// The M2M4 estimator is reputed to be quite good, as is the SVR measure. -// A good reference is: +// A good reference on the M2M4 estimator: // D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. // A nice python implementation: // https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py -// However, we have not implemented kurtosis and kurtosis, which is required of the method. +template +auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + BOOST_ASSERT_MSG(estimated_signal_kurtosis >= 0, "The estimated signal kurtosis must be >=0"); + BOOST_ASSERT_MSG(estimated_noise_kurtosis >= 0, "The estimated noise kurtosis must be >=0"); + using Real = typename Container::value_type; + using std::sqrt; + if constexpr (std::is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + // If we first eliminate N, we obtain the quadratic equation: + // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0 + // If we first eliminate S, we obtain the quadratic equation: + // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 + // We see that if kw=3, we have a special case, and if ka+kw=6, we have a special case. + auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(noisy_signal); + // Change to notation in Pauluzzi, equation 41: + auto kw = estimated_noise_kurtosis; + auto ka = estimated_signal_kurtosis; + // A common case, since it's the default: + Real a = (ka+kw-6); + Real bs = 2*M2*(3-kw); + Real cs = kw*M2*M2 - M4; + Real bn = 2*M2*(3-ka); + Real cn = ka*M2*M2 - M4; + Real N, S; + if(kw == 3) + { + if (ka == 3) + { + // When ka = kw = 3, then either the system is inconsistent, or the system does not have a unique solution: + return std::numeric_limits::quiet_NaN(); + } + Real Ssq = -cs/a; + if (Ssq < 0) + { + Real radicand = bn*bn - 4*a*cn; + if (radicand < 0) + { + return std::numeric_limits::quiet_NaN(); + } + N = (-bn + sqrt(radicand))/(2*a); + if (N < 0) + { + N = (-bn - sqrt(radicand))/(2*a); + if (N < 0) + { + return std::numeric_limits::quiet_NaN(); + } + S = M2 - N; + if (S < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; + } + + } + S = sqrt(Ssq); + N = M2 - S; + if (N < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; + } + + // Maybe I should look for some very small distance from 6, but . . . + if (ka+kw == 6) + { + // In this case we don't need to solve a quadratic equation: + S = -cs/bs; + N = -cn/bn; + if (S/N < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; + } + + // The special cases have been taken care of. + // Now we must resort to solving a full quadratic. + Real radicand = bs*bs - 4*a*cs; + if (radicand < 0) + { + // See if we have a solution for N: + radicand = bn*bn - 4*a*cn; + if (radicand < 0) + { + // Both S and N are complex: + return std::numeric_limits::quiet_NaN(); + } + // N is real. Can it be made positive? + N = (-bn + sqrt(radicand))/(2*a); + if (N < 0) + { + N = (-bn - sqrt(radicand))/(2*a); + if (N < 0) + { + return std::numeric_limits::quiet_NaN(); + } + } + S = M2 - N; + if (S < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; + } + + S = (-bs + sqrt(radicand))/(2*a); + if (S < 0) + { + S = (-bs - sqrt(radicand))/(2*a); + if (S < 0) + { + return std::numeric_limits::quiet_NaN(); + } + } + N = M2 - S; + if (N < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; + } + else + { + BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); + } +} + +template +auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} }}} #endif diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 5e0570cd92..ad9158d0cb 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -293,6 +293,62 @@ inline auto population_kurtosis(Container const & v) } +// Follows equation 1.5/1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto +first_four_moments(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double M4 = 0; + double n = 2; + for (auto it = first + 1; it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real M4 = 0; + Real n = 2; + for (auto it = first + 1; it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } +} + +template +inline auto first_four_moments(Container const & v) +{ + return first_four_moments(v.cbegin(), v.cend()); +} + }}} #endif diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 33bd0cd65a..83ea617aa6 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -13,12 +13,14 @@ #include #include #include +#include #include #include #include using boost::multiprecision::cpp_bin_float_50; using boost::multiprecision::cpp_complex_50; +using boost::math::constants::two_pi; /* * Test checklist: @@ -274,6 +276,38 @@ void test_complex_oracle_snr() BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } +template +void test_m2m4_snr_estimator() +{ + std::vector signal(5000, 1); + std::vector noise(signal.size()); + std::vector x(signal.size()); + std::mt19937 gen(18); + std::normal_distribution dis{0, 1.0}; + + for (size_t i = 0; i < noise.size(); ++i) { + signal[i] = 5*sin(100*6.28*i/noise.size()); + noise[i] = dis(gen); + x[i] = signal[i] + noise[i]; + } + + auto m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5); + auto oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, noise); + BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); + + std::uniform_real_distribution uni_dis{-1,1}; + for (size_t i = 0; i < noise.size(); ++i) + { + noise[i] = uni_dis(gen); + x[i] = signal[i] + noise[i]; + } + + // Kurtosis of continuous uniform distribution over [-1,1] is 1.8: + m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5, 1.8); + oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, noise); + BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); +} + int main() { test_absolute_median(); @@ -319,5 +353,9 @@ int main() test_complex_oracle_snr>(); test_complex_oracle_snr(); + test_m2m4_snr_estimator(); + test_m2m4_snr_estimator(); + test_m2m4_snr_estimator(); + return boost::report_errors(); } From dc521aeb11158590a2e9e6d2cb5d0677791c0df3 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 13 Dec 2018 11:45:52 -0700 Subject: [PATCH 15/46] Clarify a case where solution to M2M4 estimator system does not exist. Split off a degenerate case where the signal is constant. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 5 +++++ include/boost/math/tools/signal_statistics.hpp | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 8ef112a377..15ae154abd 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -210,6 +210,11 @@ and hence it should really be compared to the mean-invariant SNR. There is no guarantee that a solution of this system exists for all value of these parameters, in fact nonexistence can easily be demonstrated for certain data. If there is no solution to the system, then failure is communicated by returning NaNs. +The author has not managed to fully characterize the conditions under which a real solution with /S > 0/ and /N >0/ exists. +However, a very intuitive example demonstrates why nonexistence can occur. +One case is where both the signal and noise kurtosis are assumed to be equal to three. +Then the method has no mechanism for distinguishing the signal from the noise, and the solution is non-unique. + [heading References] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 0556a9ef5d..440983bd9f 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -289,8 +289,8 @@ auto oracle_snr_db(Container const & signal, Container const & noise) template auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) { - BOOST_ASSERT_MSG(estimated_signal_kurtosis >= 0, "The estimated signal kurtosis must be >=0"); - BOOST_ASSERT_MSG(estimated_noise_kurtosis >= 0, "The estimated noise kurtosis must be >=0"); + BOOST_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive"); + BOOST_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive."); using Real = typename Container::value_type; using std::sqrt; if constexpr (std::is_floating_point::value || @@ -302,6 +302,11 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 // We see that if kw=3, we have a special case, and if ka+kw=6, we have a special case. auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(noisy_signal); + if (M4 == 0) + { + // The signal is constant. There is no noise: + return std::numeric_limits::infinity(); + } // Change to notation in Pauluzzi, equation 41: auto kw = estimated_noise_kurtosis; auto ka = estimated_signal_kurtosis; From 3c13c588b07bc52959658e1b3300f45197a80a52 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 14 Dec 2018 12:27:10 -0700 Subject: [PATCH 16/46] Pearson's correlation coefficient [CI SKIP] --- .../bivariate_statistics.qbk | 19 ++++++++ .../boost/math/tools/bivariate_statistics.hpp | 47 +++++++++++++++++++ test/bivariate_statistics_test.cpp | 46 ++++++++++++++++++ 3 files changed, 112 insertions(+) diff --git a/doc/vector_functionals/bivariate_statistics.qbk b/doc/vector_functionals/bivariate_statistics.qbk index 8cf4dcdc7d..6f53e96972 100644 --- a/doc/vector_functionals/bivariate_statistics.qbk +++ b/doc/vector_functionals/bivariate_statistics.qbk @@ -21,6 +21,9 @@ namespace boost{ namespace math{ namespace tools { template auto means_and_population_covariance(Container const & u, Container const & v); + template + auto correlation_coefficient(Container const & u, Container const & v); + }}} `` @@ -46,6 +49,22 @@ As such, we provide `means_and_population_covariance`: std::vector v{1,2,3,4,5}; auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_population_covariance(u, v); +[heading Correlation Coefficient] + +Computes the [@https://en.wikipedia.org/wiki/Pearson_correlation_coefficient Pearson correlation coefficient] of two datasets /u/ and /v/: + + std::vector u{1,2,3,4,5}; + std::vector v{1,2,3,4,5}; + double rho_uv = boost::math::tools::correlation_coefficient(u, v); + // rho_uv = 1. + +The data must be forward iterable and cannot be complex. + +If one or both of the datasets is constant, the correlation coefficient is an indeterminant form (0/0) and definitions must be introduced to assign it a value. +We use the following: If both datasets are constant, then the correlation coefficient is 1. +If one dataset is constant, and the other is not, then the correlation coefficient is zero. + + [heading References] * Bennett, Janine, et al. ['Numerically stable, single-pass, parallel statistics algorithms.] Cluster Computing and Workshops, 2009. CLUSTER'09. IEEE International Conference on. IEEE, 2009. diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp index 4a2a3883ba..dae162b7f3 100644 --- a/include/boost/math/tools/bivariate_statistics.hpp +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -49,5 +49,52 @@ population_covariance(Container const & u, Container const & v) return cov; } +template +auto correlation_coefficient(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples."); + + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + Real Qu = 0; + Real Qv = 0; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = u[i] - mu_u; + Real v_tmp = v[i] - mu_v; + Qu = Qu + (i*u_tmp*u_tmp)/(i+1); + Qv = Qv + (i*v_tmp*v_tmp)/(i+1); + cov += i*u_tmp*v_tmp/(i+1); + mu_u = mu_u + u_tmp/(i+1); + mu_v = mu_v + v_tmp/(i+1); + } + + // If both datasets are constant, then they are perfectly correlated. + if (Qu == 0 && Qv == 0) + { + return Real(1); + } + // If one dataset is constant and the other isn't, then they have no correlation: + if (Qu == 0 || Qv == 0) + { + return Real(0); + } + + // Make sure rho in [-1, 1], even in the presence of numerical noise. + Real rho = cov/sqrt(Qu*Qv); + if (rho > 1) { + rho = 1; + } + if (rho < -1) { + rho = -1; + } + return rho; +} + }}} #endif diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp index 924cb68494..5ba592d600 100644 --- a/test/bivariate_statistics_test.cpp +++ b/test/bivariate_statistics_test.cpp @@ -110,6 +110,47 @@ void test_covariance() } +template +void test_correlation_coefficient() +{ + using boost::math::tools::correlation_coefficient; + + Real tol = std::numeric_limits::epsilon(); + std::vector u{1}; + std::vector v{1}; + Real rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1,1}; + v = {1,1}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1, 2, 3}; + v = {1, 2, 3}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv - 1) < tol); + + u = {1, 2, 3}; + v = {-1, -2, -3}; + rho_uv = correlation_coefficient(u, v); + BOOST_TEST(abs(rho_uv + 1) < tol); + + rho_uv = correlation_coefficient(v, u); + BOOST_TEST(abs(rho_uv + 1) < tol); + + u = {1, 2, 3}; + v = {0, 0, 0}; + rho_uv = correlation_coefficient(v, u); + BOOST_TEST(abs(rho_uv) < tol); + + u = {1, 2, 3}; + v = {0, 0, 3}; + rho_uv = correlation_coefficient(v, u); + // mu_u = 2, sigma_u^2 = 2/3, mu_v = 1, sigma_v^2 = 2, cov(u,v) = 1. + BOOST_TEST(abs(rho_uv - sqrt(Real(3))/Real(2)) < tol); +} + int main() { test_covariance(); @@ -117,5 +158,10 @@ int main() test_covariance(); test_covariance(); + test_correlation_coefficient(); + test_correlation_coefficient(); + test_correlation_coefficient(); + test_correlation_coefficient(); + return boost::report_errors(); } From efdafa26434dc5bccea6b6d3d13bf28835436b74 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 14 Dec 2018 13:02:12 -0700 Subject: [PATCH 17/46] Add notes about beliefs about M2M4; add notes about potential improvements [CI SKIP] --- include/boost/math/tools/signal_statistics.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 440983bd9f..8e7c0c8808 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -301,6 +301,11 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val // If we first eliminate S, we obtain the quadratic equation: // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 // We see that if kw=3, we have a special case, and if ka+kw=6, we have a special case. + // I believe these equations are totally independent quadratics; + // if one has a complex solution it is not necessarily the case that the other must also. + // However, I can't prove that, so there is a chance that this does unnecessary work. + // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here. + // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711 auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(noisy_signal); if (M4 == 0) { From cee62a833cdfd3ddc3bfb801e1cf0289880c80d6 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sun, 16 Dec 2018 23:05:00 -0700 Subject: [PATCH 18/46] Add unit tests for Gini coefficient for uniform and exponential distribution of values. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 50 +++++---- .../univariate_statistics.qbk | 8 +- .../boost/math/tools/signal_statistics.hpp | 30 ++--- test/signal_statistics_test.cpp | 106 +++++++++++++----- 4 files changed, 129 insertions(+), 65 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 15ae154abd..c1a913d69e 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -46,10 +46,10 @@ namespace boost{ namespace math{ namespace tools { auto shannon_cost(ForwardIterator first, ForwardIterator last); template - auto oracle_snr(Container const & signal, Container const & noise); + auto oracle_snr(Container const & signal, Container const & noisy_signal); template - auto oracle_snr_db(Container const & signal, Container const & noise); + auto oracle_snr_db(Container const & signal, Container const & noisy_signal); template auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); @@ -73,41 +73,53 @@ For certain operations (total variation, for example) integer inputs are support The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance. See [@https://wavelet-tour.github.io/ Mallat] for details. -The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators. +The absolute median supports both real and complex arithmetic, modifies its input, and requires random access containers. std::vector v{-1, 1}; - double m = boost::math::tools::absolute_median(v.begin(), v.end()); + double m = boost::math::tools::absolute_median(v); // m = 1 + // Alternative syntax, using a subset of the container: + m = boost::math::tools::absolute_median(v.begin(), v.begin() + 1); [heading Absolute Gini Coefficient] The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. +ee [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard] for details. However, for measuring sparsity, the phase of the numbers is irrelevant, so we provide the `absolute_gini_coefficient`: + using boost::math::tools::absolute_gini_coefficient; std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); - // now abs_gini = 1 + double abs_gini = absolute_gini_coefficient(v); + // now abs_gini = 1; maximally unequal std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); - // now abs_gini = 0 + abs_gini = absolute_gini_coefficient(w); + // now abs_gini = 0; every element of the vector has equal magnitude std::vector u{-1, 1, -1}; - double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end()); + abs_gini = absolute_gini_coefficient(u); // now abs_gini = 0 + // Alternative call useful for computing over subset of the input: + abs_gini = absolute_gini_coefficient(u.begin(), u.begin() + 1); + + // If you need the population Gini coefficient: + double population_gini = (u.size() -1)*absolute_gini_coefficient(u)/u.size(); Wikipedia calls our scaling a "sample Gini coefficient". We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient, whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. +Our scaling lacks one desirable property of the population Gini coefficient, namely that "cloning" a vector has the same Gini coefficient. +If you wish to recover the cloning property, convert to the population Gini coefficient. + If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), consider calculating the Hoyer sparsity instead. [heading Hoyer Sparsity] The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms. -As the name suggests, it is used to measure sparsity in an expansion in some basis. +As the name suggests, it is used to measure the sparsity of an expansion in some basis. The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. @@ -145,20 +157,18 @@ See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for deta [heading Oracle Signal-to-noise ratio] -The function `oracle_snr` computes the ratio \u2016 /s/ \u2016[sub 2][super 2] / \u2016 /w/ \u2016[sub 2][super 2], where /s/ is signal and /w/ is noise. -The function `oracle_snr_db` computes 10`log`[sub 10](\u2016 /s/ \u2016[super 2] / \u2016 /w/ \u2016[super 2]). -In general, one does not know how to decompose a real signal /x/ into /s/ + /w/ and as such /s/ is regarded as oracle information. -Hence this function is mainly useful for unit testing other SNR measurements. +The function `oracle_snr` computes the ratio \u2016 /s/ \u2016[sub 2][super 2] / \u2016 /s/ - /x/ \u2016[sub 2][super 2], where /s/ is signal and /x/ is a noisy signal. +The function `oracle_snr_db` computes 10`log`[sub 10](\u2016 /s/ \u2016[super 2] / \u2016 /s/ - /x/ \u2016[super 2]). +The functions are so named because in general, one does not know how to decompose a real signal /x/ into /s/ + /w/ and as such /s/ is regarded as oracle information. +Hence this function is mainly useful for unit testing other SNR estimators. Usage: std::vector signal(500, 3.2); - std::vector noise(500); - // fill 'noise' with Gaussian white noise... - double snr_db = boost::math::tools::oracle_snr_db(signal, noise); - double snr = boost::math::tools::oracle_snr(signal, noise); - -The call should return the same value as [@https://www.mathworks.com/help/signal/ref/snr.html Matlab's `snr`]. + std::vector noisy_signal(500); + // fill 'noisy_signal' signal + noise + double snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + double snr = boost::math::tools::oracle_snr(signal, noisy_signal); The input can be real, complex, or integral. Integral inputs produce double precision floating point outputs. diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index d4d923f5ec..0cd5086ba8 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -149,13 +149,15 @@ Compute the Gini coefficient of a dataset: /Nota bene: The input data is altered-in particular, it is sorted./ /Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. -We have chosen to follow [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard's definition], which [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia] calls a "sample Gini coefficient". -Hurley and Rickard's definition places the Gini coefficient in the range [0,1]; Wikipedia's population Gini coefficient is in the range [0, 1 - 1/ /n/]. -If you wish to convert the Boost Gini coefficient to the population Gini coefficient, multiply by (/n/-1)/ /n/. +We use [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia's] "sample Gini coefficient". +The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. +If you wish to convert the sample Gini coefficient returned by Boost to the population Gini coefficient, multiply by (/n/-1)/ /n/. /Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. You should have /very/ good cause to pass negative values to the Gini coefficient calculator. +Another use case is found in signal processing, but the sorting is by magnitude and hence has a different implementation. +See `absolute_gini_coefficient` for details. [heading References] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 8e7c0c8808..a829394699 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -96,6 +96,7 @@ inline auto shannon_cost(Container const & v) template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) { + using std::abs; using RealOrComplex = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); @@ -120,6 +121,8 @@ auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) return zero; } return ((2*num)/denom - i)/(i-2); + + } template @@ -168,10 +171,11 @@ inline auto hoyer_sparsity(Container const & v) template -auto oracle_snr(Container const & signal, Container const & noise) +auto oracle_snr(Container const & signal, Container const & noisy_signal) { using Real = typename Container::value_type; - BOOST_ASSERT_MSG(signal.size() == noise.size(), "Signal and noise must be have the same number of elements."); + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), + "Signal and noisy_signal must be have the same number of elements."); if constexpr (std::is_integral::value) { double numerator = 0; @@ -179,7 +183,7 @@ auto oracle_snr(Container const & signal, Container const & noise) for (size_t i = 0; i < signal.size(); ++i) { numerator += signal[i]*signal[i]; - denominator += noise[i]*noise[i]; + denominator += (noisy_signal[i] - signal[i])*(noisy_signal[i] - signal[i]); } if (numerator == 0 && denominator == 0) { @@ -201,7 +205,7 @@ auto oracle_snr(Container const & signal, Container const & noise) for (size_t i = 0; i < signal.size(); ++i) { numerator += norm(signal[i]); - denominator += norm(noise[i]); + denominator += norm(noisy_signal[i] - signal[i]); } if (numerator == 0 && denominator == 0) { @@ -221,7 +225,7 @@ auto oracle_snr(Container const & signal, Container const & noise) for (size_t i = 0; i < signal.size(); ++i) { numerator += signal[i]*signal[i]; - denominator += noise[i]*noise[i]; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); } if (numerator == 0 && denominator == 0) { @@ -237,10 +241,10 @@ auto oracle_snr(Container const & signal, Container const & noise) } template -auto mean_invariant_oracle_snr(Container const & signal, Container const & noise) +auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal) { using Real = typename Container::value_type; - BOOST_ASSERT_MSG(signal.size() == noise.size(), "Signal and noise must be have the same number of elements."); + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noise must be have the same number of elements."); Real mean = boost::math::tools::mean(signal); Real numerator = 0; @@ -249,7 +253,7 @@ auto mean_invariant_oracle_snr(Container const & signal, Container const & noise { Real tmp = signal[i] - mean; numerator += tmp*tmp; - denominator += noise[i]*noise[i]; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); } if (numerator == 0 && denominator == 0) { @@ -264,21 +268,20 @@ auto mean_invariant_oracle_snr(Container const & signal, Container const & noise } -// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. template -auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noise) +auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal) { using std::log10; - return 10*log10(mean_invariant_oracle_snr(signal, noise)); + return 10*log10(mean_invariant_oracle_snr(signal, noisy_signal)); } // Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. template -auto oracle_snr_db(Container const & signal, Container const & noise) +auto oracle_snr_db(Container const & signal, Container const & noisy_signal) { using std::log10; - return 10*log10(oracle_snr(signal, noise)); + return 10*log10(oracle_snr(signal, noisy_signal)); } // A good reference on the M2M4 estimator: @@ -300,7 +303,6 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0 // If we first eliminate S, we obtain the quadratic equation: // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 - // We see that if kw=3, we have a special case, and if ka+kw=6, we have a special case. // I believe these equations are totally independent quadratics; // if one has a complex solution it is not necessarily the case that the other must also. // However, I can't prove that, so there is a chance that this does unnecessary work. diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 83ea617aa6..f22f6a9c77 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -29,17 +29,18 @@ using boost::math::constants::two_pi; * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.) * 4) Does it work with std::forward_list if a forward iterator is all that is required? * 5) Does it work with complex data if complex data is sensible? + * 6) Does it work with integer data if sensible? */ template void test_absolute_median() { - std::mt19937 g(12); std::vector v{-1, 2, -3, 4, -5, 6, -7}; Real m = boost::math::tools::absolute_median(v.begin(), v.end()); BOOST_TEST_EQ(m, 4); + std::mt19937 g(12); std::shuffle(v.begin(), v.end(), g); m = boost::math::tools::absolute_median(v); BOOST_TEST_EQ(m, 4); @@ -77,6 +78,17 @@ void test_absolute_median() std::array w{1, 2, -3}; m = boost::math::tools::absolute_median(w); BOOST_TEST_EQ(m, 2); + + // boost.ublas vector? + boost::numeric::ublas::vector u(6); + u[0] = 1; + u[1] = 2; + u[2] = -3; + u[3] = 1; + u[4] = 2; + u[5] = -3; + m = boost::math::tools::absolute_median(u); + BOOST_TEST_EQ(m, 2); } @@ -104,6 +116,12 @@ void test_complex_absolute_median() v = {{0, -1}}; m = boost::math::tools::absolute_median(v.begin(), v.end()); BOOST_TEST_EQ(m, 1); + + boost::numeric::ublas::vector w(1); + w[0] = {0, -1}; + m = boost::math::tools::absolute_median(w); + BOOST_TEST_EQ(m, 1); + } @@ -180,18 +198,19 @@ void test_complex_hoyer_sparsity() template void test_absolute_gini_coefficient() { + using boost::math::tools::absolute_gini_coefficient; Real tol = std::numeric_limits::epsilon(); std::vector v{-1,0,0}; - Real gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); + Real gini = absolute_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); - gini = boost::math::tools::absolute_gini_coefficient(v); + gini = absolute_gini_coefficient(v); BOOST_TEST(abs(gini - 1) < tol); v[0] = 1; v[1] = -1; v[2] = 1; - gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); + gini = absolute_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); std::vector> w(128); @@ -200,10 +219,43 @@ void test_absolute_gini_coefficient() { w[k] = exp(i*static_cast(k)/static_cast(w.size())); } - gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end()); + gini = absolute_gini_coefficient(w.begin(), w.end()); BOOST_TEST(abs(gini) < tol); - // The Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). + // The population Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). + // We use the sample Gini index, so we need to rescale + std::vector u(1000); + std::mt19937 gen(35); + std::uniform_real_distribution dis(0, 50); + for (size_t i = 0; i < u.size()/2; ++i) + { + u[i] = dis(gen); + } + for (size_t i = 0; i < u.size()/2; ++i) + { + u[i + u.size()/2] = u[i]; + } + std::cout << std::setprecision(std::numeric_limits::digits10 + 1); + Real scale1 = (u.size() - 2)/static_cast(u.size()); + Real scale2 = (u.size() - 1)/static_cast(u.size()); + Real population_gini1 = scale1*absolute_gini_coefficient(u.begin(), u.begin() + u.size()/2); + Real population_gini2 = scale2*absolute_gini_coefficient(u.begin(), u.end()); + + BOOST_TEST(abs(population_gini1 - population_gini2) < 10*tol); + + // The Gini coefficient of a uniform distribution is (b-a)/(3*(b+a)), see https://en.wikipedia.org/wiki/Gini_coefficient + Real expected = (dis.b() - dis.a() )/(3*(dis.a() + dis.b())); + + BOOST_TEST(abs(expected - population_gini1) < 0.01); + + std::exponential_distribution exp_dis(1); + for (size_t i = 0; i < u.size(); ++i) + { + u[i] = exp_dis(gen); + } + population_gini2 = scale2*absolute_gini_coefficient(u); + + BOOST_TEST(abs(population_gini2 - 0.5) < 0.01); } template @@ -233,11 +285,11 @@ void test_oracle_snr() Real tol = 100*std::numeric_limits::epsilon(); size_t length = 100; std::vector signal(length, 1); - std::vector noise(length, 0); + std::vector noisy_signal = signal; - noise[0] = 1; - Real snr = boost::math::tools::oracle_snr(signal, noise); - Real snr_db = boost::math::tools::oracle_snr_db(signal, noise); + noisy_signal[0] += 1; + Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -249,11 +301,11 @@ void test_integer_oracle_snr() double tol = std::numeric_limits::epsilon(); size_t length = 100; std::vector signal(length, 1); - std::vector noise(length, 0); + std::vector noisy_signal = signal; - noise[0] = 1; - double snr = boost::math::tools::oracle_snr(signal, noise); - double snr_db = boost::math::tools::oracle_snr_db(signal, noise); + noisy_signal[0] += 1; + double snr = boost::math::tools::oracle_snr(signal, noisy_signal); + double snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -267,11 +319,11 @@ void test_complex_oracle_snr() Real tol = 100*std::numeric_limits::epsilon(); size_t length = 100; std::vector signal(length, {1,0}); - std::vector noise(length, {0,0}); + std::vector noisy_signal = signal; - noise[0] = {1,0}; - Real snr = boost::math::tools::oracle_snr(signal, noise); - Real snr_db = boost::math::tools::oracle_snr_db(signal, noise); + noisy_signal[0] += Complex(1,0); + Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -280,31 +332,29 @@ template void test_m2m4_snr_estimator() { std::vector signal(5000, 1); - std::vector noise(signal.size()); std::vector x(signal.size()); std::mt19937 gen(18); std::normal_distribution dis{0, 1.0}; - for (size_t i = 0; i < noise.size(); ++i) { - signal[i] = 5*sin(100*6.28*i/noise.size()); - noise[i] = dis(gen); - x[i] = signal[i] + noise[i]; + for (size_t i = 0; i < x.size(); ++i) { + signal[i] = 5*sin(100*6.28*i/x.size()); + x[i] = signal[i] + dis(gen); } + // Kurtosis of a sine wave is 1.5: auto m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5); - auto oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, noise); + auto oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); std::uniform_real_distribution uni_dis{-1,1}; - for (size_t i = 0; i < noise.size(); ++i) + for (size_t i = 0; i < x.size(); ++i) { - noise[i] = uni_dis(gen); - x[i] = signal[i] + noise[i]; + x[i] = signal[i] + uni_dis(gen); } // Kurtosis of continuous uniform distribution over [-1,1] is 1.8: m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5, 1.8); - oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, noise); + oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); } From 4d85b85a5c1572fce7d0cd640af223b4adc07135 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 00:18:16 -0700 Subject: [PATCH 19/46] Do not allow computation of sparsity of a vector with a single element. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 7 +++--- .../boost/math/tools/signal_statistics.hpp | 4 ++-- test/signal_statistics_test.cpp | 23 +++++++++++++++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index c1a913d69e..831519ca6b 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -122,7 +122,7 @@ The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113 As the name suggests, it is used to measure the sparsity of an expansion in some basis. The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). -For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. +For details, see [@http://www.jmlr.org/papers/volume5/hoyer04a/hoyer04a.pdf Hoyer] as well as [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. Usage: @@ -134,7 +134,8 @@ Usage: // hs = 0 The container must be forward iterable and the contents are not modified. -Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. +Accepts real, complex, and integer inputs. +If the input is an integral type, the output is a double precision float. [heading Shannon Entropy] @@ -233,7 +234,7 @@ Then the method has no mechanism for distinguishing the signal from the noise, a * Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. * D. R. Pauluzzi and N. C. Beaulieu, ['A comparison of SNR estimation techniques for the AWGN channel,] IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. - +* Hoyer, Patrik O. ['Non-negative matrix factorization with sparseness constraints.], Journal of machine learning research 5.Nov (2004): 1457-1469. [endsect] [/section:signal_statistics Signal Statistics] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index a829394699..7b9d246abf 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -139,7 +139,7 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) using RealIntOrComplex = typename std::iterator_traits::value_type; using std::abs; using std::sqrt; - BOOST_ASSERT_MSG(first != last, "Computation of the Hoyer sparsity requires at least one sample."); + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples."); decltype(abs(*first)) l1 = 0; decltype(abs(*first)) l2 = 0; @@ -244,7 +244,7 @@ template auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal) { using Real = typename Container::value_type; - BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noise must be have the same number of elements."); + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements."); Real mean = boost::math::tools::mean(signal); Real numerator = 0; diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index f22f6a9c77..53312998b1 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -150,6 +150,20 @@ void test_hoyer_sparsity() std::array w{1,1,1}; hs = boost::math::tools::hoyer_sparsity(w); BOOST_TEST(abs(hs) < tol); + + // Now some statistics: + // If x_i ~ Unif(0,1), E[x_i] = 1/2, E[x_i^2] = 1/3. + // Therefore, E[||x||_1] = N/2, E[||x||_2] = sqrt(N/3), + // and hoyer_sparsity(x) = (1-sqrt(3)/2)/(1-1/sqrt(N)) + std::mt19937 gen(82); + std::uniform_real_distribution dis(0, 1); + v.resize(5000); + for (size_t i = 0; i < v.size(); ++i) { + v[i] = dis(gen); + } + hs = boost::math::tools::hoyer_sparsity(v); + Real expected = (1.0 - boost::math::constants::root_three()/2)/(1.0 - 1.0/sqrt(v.size())); + BOOST_TEST(abs(expected - hs) < 0.01); } template @@ -355,6 +369,7 @@ void test_m2m4_snr_estimator() // Kurtosis of continuous uniform distribution over [-1,1] is 1.8: m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5, 1.8); oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); + // The performance depends on the exact numbers generated by the distribution, but this isn't bad: BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); } @@ -382,15 +397,15 @@ int main() test_integer_hoyer_sparsity(); - test_shannon_entropy(); - test_shannon_entropy(); - test_shannon_entropy(); - test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity(); + test_shannon_entropy(); + test_shannon_entropy(); + test_shannon_entropy(); + test_oracle_snr(); test_oracle_snr(); test_oracle_snr(); From 1e3e5474271a3f95011fe1e33acb99509fba85a8 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 11:02:24 -0700 Subject: [PATCH 20/46] Apply fine-tooth comb to Hoyer sparsity. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 6 ++++++ test/signal_statistics_test.cpp | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 831519ca6b..87aa0cc44f 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -124,6 +124,12 @@ As the name suggests, it is used to measure the sparsity of an expansion in some The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1). For details, see [@http://www.jmlr.org/papers/volume5/hoyer04a/hoyer04a.pdf Hoyer] as well as [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard]. +A few special cases will serve to clarify the intended use: +If /v/ has only one nonzero coefficient, the Hoyer sparsity attains its maxima of 1. +If the coefficients of /v/ all have the same magnitude, then the Hoyer sparsity attains its minima of zero. +If the elements of /v/ are uniformly distributed on an interval [0, /b/], then the Hoyer sparsity is approximately 0.133. + + Usage: std::vector v{1,0,0}; diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 53312998b1..b6ea541e31 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -154,7 +154,7 @@ void test_hoyer_sparsity() // Now some statistics: // If x_i ~ Unif(0,1), E[x_i] = 1/2, E[x_i^2] = 1/3. // Therefore, E[||x||_1] = N/2, E[||x||_2] = sqrt(N/3), - // and hoyer_sparsity(x) = (1-sqrt(3)/2)/(1-1/sqrt(N)) + // and hoyer_sparsity(x) is close to (1-sqrt(3)/2)/(1-1/sqrt(N)) std::mt19937 gen(82); std::uniform_real_distribution dis(0, 1); v.resize(5000); @@ -164,6 +164,20 @@ void test_hoyer_sparsity() hs = boost::math::tools::hoyer_sparsity(v); Real expected = (1.0 - boost::math::constants::root_three()/2)/(1.0 - 1.0/sqrt(v.size())); BOOST_TEST(abs(expected - hs) < 0.01); + + // Does it work with a forward list? + std::forward_list u1{1, 1, 1}; + hs = boost::math::tools::hoyer_sparsity(u1); + BOOST_TEST(abs(hs) < tol); + + // Does it work with a boost ublas vector? + boost::numeric::ublas::vector u2(3); + u2[0] = 1; + u2[1] = 1; + u2[2] = 1; + hs = boost::math::tools::hoyer_sparsity(u2); + BOOST_TEST(abs(hs) < tol); + } template From b93acf94b2a4a06fd8fb6a8245fb267ad31ca660 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 11:45:38 -0700 Subject: [PATCH 21/46] Do not add Shannon entropy and Shannon cost until I have an use for them; without a use, serious design errors are easily made. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 30 ------------ .../boost/math/tools/signal_statistics.hpp | 48 ------------------- test/signal_statistics_test.cpp | 23 --------- 3 files changed, 101 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 87aa0cc44f..4fba934ecd 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -33,18 +33,6 @@ namespace boost{ namespace math{ namespace tools { template auto hoyer_sparsity(ForwardIterator first, ForwardIterator last); - template - auto shannon_entropy(Container const & c); - - template - auto shannon_entropy(ForwardIterator first, ForwardIterator last); - - template - auto shannon_cost(Container const & c); - - template - auto shannon_cost(ForwardIterator first, ForwardIterator last); - template auto oracle_snr(Container const & signal, Container const & noisy_signal); @@ -143,24 +131,6 @@ The container must be forward iterable and the contents are not modified. Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float. -[heading Shannon Entropy] - - std::vector v{1/2.0, 1/2.0}; - double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); - // Hs = ln(2). - -The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced. -The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base. - -[heading Shannon Cost] - - std::vector v{-1, 1,-1}; - double Ks = boost::math::tools::shannon_cost(v.begin(), v.end()); - // Ks = 0; concentration of the vector is minimized. - -The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression. -The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w). -See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details. [heading Oracle Signal-to-noise ratio] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 7b9d246abf..8eae5fb35c 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -45,54 +45,6 @@ inline auto absolute_median(RandomAccessContainer & v) return absolute_median(v.begin(), v.end()); } - -template -auto shannon_entropy(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - using std::log; - Real entropy = 0; - for (auto it = first; it != last; ++it) - { - if (*it != 0) - { - entropy += (*it)*log(*it); - } - } - return -entropy; -} - -template -inline auto shannon_entropy(Container const & v) -{ - return shannon_entropy(v.cbegin(), v.cend()); -} - - -template -auto shannon_cost(ForwardIterator first, ForwardIterator last) -{ - using Real = typename std::iterator_traits::value_type; - using std::log; - Real cost = 0; - for (auto it = first; it != last; ++it) - { - if (*it != 0) - { - Real tmp = abs(*it); - cost += tmp*tmp*log(tmp*tmp); - } - } - return -cost; -} - -template -inline auto shannon_cost(Container const & v) -{ - return shannon_cost(v.cbegin(), v.cend()); -} - - template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) { diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index b6ea541e31..7fe3fa04d3 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -286,25 +286,6 @@ void test_absolute_gini_coefficient() BOOST_TEST(abs(population_gini2 - 0.5) < 0.01); } -template -void test_shannon_entropy() -{ - Real tol = 100*std::numeric_limits::epsilon(); - using boost::math::constants::half; - using boost::math::constants::ln_two; - std::vector v(30, half()); - Real Hs = boost::math::tools::shannon_entropy(v.begin(), v.end()); - Real expected = v.size()*ln_two()/2; - BOOST_TEST(abs(Hs - expected) < tol*expected); - - Hs = boost::math::tools::shannon_entropy(v); - BOOST_TEST(abs(Hs - expected) < tol*expected); - - std::array w{half(), half(), half()}; - Hs = boost::math::tools::shannon_entropy(w); - expected = 3*ln_two()/2; - BOOST_TEST(abs(Hs - expected) < tol*expected); -} template void test_oracle_snr() @@ -416,10 +397,6 @@ int main() test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity(); - test_shannon_entropy(); - test_shannon_entropy(); - test_shannon_entropy(); - test_oracle_snr(); test_oracle_snr(); test_oracle_snr(); From d5099576420575459c174f34037e94a93e989c68 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 14:39:30 -0700 Subject: [PATCH 22/46] Instead of using 'population_stat' in calls, simply call 'stat', and provide 'sample_stat' when an unbiased estimator of stat exists. [CI SKIP] --- .../bivariate_statistics.qbk | 20 +- .../univariate_statistics.qbk | 103 ++++-- .../boost/math/tools/bivariate_statistics.hpp | 8 +- .../math/tools/univariate_statistics.hpp | 216 ++++++------ test/bivariate_statistics_test.cpp | 33 +- test/univariate_statistics_test.cpp | 326 ++++++++++++------ 6 files changed, 441 insertions(+), 265 deletions(-) diff --git a/doc/vector_functionals/bivariate_statistics.qbk b/doc/vector_functionals/bivariate_statistics.qbk index 6f53e96972..23b4a9ecff 100644 --- a/doc/vector_functionals/bivariate_statistics.qbk +++ b/doc/vector_functionals/bivariate_statistics.qbk @@ -16,10 +16,10 @@ namespace boost{ namespace math{ namespace tools { template - auto population_covariance(Container const & u, Container const & v); + auto covariance(Container const & u, Container const & v); template - auto means_and_population_covariance(Container const & u, Container const & v); + auto means_and_covariance(Container const & u, Container const & v); template auto correlation_coefficient(Container const & u, Container const & v); @@ -31,23 +31,25 @@ namespace boost{ namespace math{ namespace tools { This file provides functions for computing bivariate statistics. -[heading Population Covariance] +[heading Covariance] + +Computes the population covariance of two datasets: std::vector u{1,2,3,4,5}; std::vector v{1,2,3,4,5}; - double cov_uv = boost::math::tools::population_covariance(u, v); + double cov_uv = boost::math::tools::covariance(u, v); The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al]. -The data is not modified and must be forward iterable. +The data is not modified. Requires a random-access container. Works with real-valued inputs and does not work with complex-valued inputs. The algorithm used herein simultaneously generates the mean values of the input data /u/ and /v/. -For certain applications, it might be useful to get them in a single pass. -As such, we provide `means_and_population_covariance`: +For certain applications, it might be useful to get them in a single pass through the data. +As such, we provide `means_and_covariance`: std::vector u{1,2,3,4,5}; std::vector v{1,2,3,4,5}; - auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_population_covariance(u, v); + auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_covariance(u, v); [heading Correlation Coefficient] @@ -58,7 +60,7 @@ Computes the [@https://en.wikipedia.org/wiki/Pearson_correlation_coefficient Pea double rho_uv = boost::math::tools::correlation_coefficient(u, v); // rho_uv = 1. -The data must be forward iterable and cannot be complex. +The data must be random access and cannot be complex. If one or both of the datasets is constant, the correlation coefficient is an indeterminant form (0/0) and definitions must be introduced to assign it a value. We use the following: If both datasets are constant, then the correlation coefficient is 1. diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 0cd5086ba8..7e345fb2c8 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -22,22 +22,40 @@ namespace boost{ namespace math{ namespace tools { auto mean(ForwardIterator first, ForwardIterator last); template - auto mean_and_population_variance(Container const & c); + auto variance(Container const & c); template - auto mean_and_population_variance(ForwardIterator first, ForwardIterator last); + auto variance(ForwardIterator first, ForwardIterator last); template - auto median(Container & c); + auto sample_variance(Container const & c); template - auto median(ForwardIterator first, ForwardIterator last); + auto sample_variance(ForwardIterator first, ForwardIterator last); + + template + auto skewness(Container const & c); + + template + auto skewness(ForwardIterator first, ForwardIterator last); + + template + auto kurtosis(Container const & c); + + template + auto kurtosis(ForwardIterator first, ForwardIterator last); + + template + auto first_four_moments(Container const & c); + + template + auto first_four_moments(ForwardIterator first, ForwardIterator last); template - auto population_skewness(Container const & c); + auto median(Container & c); template - auto population_skewness(ForwardIterator first, ForwardIterator last); + auto median(ForwardIterator first, ForwardIterator last); template auto gini_coefficient(Container & c); @@ -45,6 +63,12 @@ namespace boost{ namespace math{ namespace tools { template auto gini_coefficient(ForwardIterator first, ForwardIterator last); + template + auto sample_gini_coefficient(Container & c); + + template + auto sample_gini_coefficient(ForwardIterator first, ForwardIterator last); + }}} `` @@ -70,54 +94,47 @@ For certain operations (total variation, for example) integer inputs are support std::vector v{1,2,3,4,5}; double mu = boost::math::tools::mean(v.cbegin(), v.cend()); // Alternative syntax if you want to use entire container: - double mu = boost::math::tools::mean(v); + mu = boost::math::tools::mean(v); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. The data is not modified and must be forward iterable. -Works with real, complex and integer data. +Works with real and integer data. If the input is an integer type, the output is a double precision float. -[heading Mean and Population Variance] +[heading Variance] std::vector v{1,2,3,4,5}; - auto [mu, s] = boost::math::tools::mean_and_population_variance(v.cbegin(), v.cend()); + Real sigma_sq = boost::math::tools::variance(v.cbegin(), v.cend()); If you don't need to calculate on a subset of the input, then the range call is more terse: std::vector v{1,2,3,4,5}; - auto [mu, s] = boost::math::tools::mean_and_population_variance(v); + Real sigma_sq = boost::math::tools::variance(v); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. -Note that we do not provide computation of population variance alone; -we are unaware of any one-pass, numerically stable computation of population variance which does not simultaneously generate the mean. -If the mean is not required, simply ignore it. -The input datatype must be forward iterable and the range `[first, last)` must contain at least two elements. +The input data must be forward iterable and the range `[first, last)` must contain at least two elements. It is /not/ in general sensible to pass complex numbers to this routine. If integers are passed as input, then the output is a double precision float. -[heading Median] - -Compute the median of a dataset: +`boost::math::tools::variance` returns the population variance. +If you want a sample variance, use std::vector v{1,2,3,4,5}; - double m = boost::math::tools::median(v.begin(), v.end()); + Real sn_sq = boost::math::tools::sample_variance(v); -/Nota bene: The input vector is modified./ -The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. -Therefore, all requirements of `std::nth_element` are inherited by the median calculation. [heading Skewness] Computes the skewness of a dataset: std::vector v{1,2,3,4,5}; - double skewness = boost::math::tools::population_skewness(v); + double skewness = boost::math::tools::skewness(v); // skewness = 0. The input vector is not modified, works with integral and real data. If the input data is integral, the output is a double precision float. -For a dataset consisting of a constant value, we return zero as the skewness. +For a dataset consisting of a single constant value, we take the skewness to be zero by definition. The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. @@ -126,7 +143,7 @@ The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-contr Computes the kurtosis of a dataset: std::vector v{1,2,3,4,5}; - double kurtosis = boost::math::tools::population_kurtosis(v); + double kurtosis = boost::math::tools::kurtosis(v); // kurtosis = 17/10 The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. @@ -135,26 +152,46 @@ If the input data is integral, the output is a double precision float. Note that this is /not/ the excess kurtosis. If you require the excess kurtosis, subtract 3 from the kurtosis. +[heading First four moments] + +Simultaneously computes the first four [@https://en.wikipedia.org/wiki/Central_moment central moments] in a single pass through the data: + + std::vector v{1,2,3,4,5}; + auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(v); + + +[heading Median] + +Compute the median of a dataset: + + std::vector v{1,2,3,4,5}; + double m = boost::math::tools::median(v.begin(), v.end()); + +/Nota bene: The input vector is modified./ +The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. +Therefore, all requirements of `std::nth_element` are inherited by the median calculation. +In particular, the container must allow random access. + + [heading Gini Coefficient] Compute the Gini coefficient of a dataset: std::vector v{1,0,0,0}; - double gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); - // gini = 1, as v[0] holds all the "wealth" + double gini = boost::math::tools::gini_coefficient(v); + // gini = 3/4 + double s_gini = boost::math::tools::sample_gini_coefficient(v); + // s_gini = 1. std::vector w{1,1,1,1}; gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); // gini = 0, as all elements are now equal. -/Nota bene: The input data is altered-in particular, it is sorted./ +/Nota bene: The input data is altered-in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators./ -/Nota bene:/ Different authors use different conventions regarding the overall scale of the Gini coefficient. -We use [@https://en.wikipedia.org/wiki/Gini_coefficient Wikipedia's] "sample Gini coefficient". -The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. -If you wish to convert the sample Gini coefficient returned by Boost to the population Gini coefficient, multiply by (/n/-1)/ /n/. +The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. /Nota bene:/ There is essentially no reason to pass negative values to the Gini coefficient function. -However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. +However, a use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered. You should have /very/ good cause to pass negative values to the Gini coefficient calculator. Another use case is found in signal processing, but the sorting is by magnitude and hence has a different implementation. See `absolute_gini_coefficient` for details. diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp index dae162b7f3..08d56276c9 100644 --- a/include/boost/math/tools/bivariate_statistics.hpp +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -16,8 +16,7 @@ namespace boost{ namespace math{ namespace tools { template -auto -means_and_population_covariance(Container const & u, Container const & v) +auto means_and_covariance(Container const & u, Container const & v) { using Real = typename Container::value_type; using std::size; @@ -42,10 +41,9 @@ means_and_population_covariance(Container const & u, Container const & v) } template -auto -population_covariance(Container const & u, Container const & v) +auto covariance(Container const & u, Container const & v) { - auto [mu_u, mu_v, cov] = boost::math::tools::means_and_population_covariance(u, v); + auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v); return cov; } diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index ad9158d0cb..ca3411c431 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -16,8 +16,7 @@ namespace boost{ namespace math{ namespace tools { template -auto -mean(ForwardIterator first, ForwardIterator last) +auto mean(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); @@ -50,8 +49,7 @@ inline auto mean(Container const & v) } template -auto -mean_and_population_variance(ForwardIterator first, ForwardIterator last) +auto variance(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); @@ -61,104 +59,56 @@ mean_and_population_variance(ForwardIterator first, ForwardIterator last) double M = *first; double Q = 0; double k = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { double tmp = *it - M; Q = Q + ((k-1)*tmp*tmp)/k; M = M + tmp/k; k += 1; } - return std::make_pair(M, Q/(k-1)); + return Q/(k-1); } else { Real M = *first; Real Q = 0; Real k = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { Real tmp = *it - M; Q = Q + ((k-1)*tmp*tmp)/k; M = M + tmp/k; k += 1; } - - return std::make_pair(M, Q/(k-1)); + return Q/(k-1); } } template -inline auto mean_and_population_variance(Container const & v) +inline auto variance(Container const & v) { - return mean_and_population_variance(v.cbegin(), v.cend()); + return variance(v.cbegin(), v.cend()); } -template -auto median(RandomAccessIterator first, RandomAccessIterator last) -{ - size_t num_elems = std::distance(first, last); - BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); - if (num_elems & 1) - { - auto middle = first + (num_elems - 1)/2; - std::nth_element(first, middle, last); - return *middle; - } - else - { - auto middle = first + num_elems/2 - 1; - std::nth_element(first, middle, last); - std::nth_element(middle, middle+1, last); - return (*middle + *(middle+1))/2; - } -} - - -template -inline auto median(RandomAccessContainer & v) -{ - return median(v.begin(), v.end()); -} - - template -auto gini_coefficient(ForwardIterator first, ForwardIterator last) +auto sample_variance(ForwardIterator first, ForwardIterator last) { - using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); - - std::sort(first, last); - - Real i = 1; - Real num = 0; - Real denom = 0; - for (auto it = first; it != last; ++it) - { - num += *it*i; - denom += *it; - ++i; - } - - // If the l1 norm is zero, all elements are zero, so every element is the same. - if (denom == 0) - { - return Real(0); - } - - return ((2*num)/denom - i)/(i-2); + size_t n = std::distance(first, last); + BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(first, last)/(n-1); } -template -inline auto gini_coefficient(RandomAccessContainer & v) +template +inline auto sample_variance(Container const & v) { - return gini_coefficient(v.begin(), v.end()); + return sample_variance(v.cbegin(), v.cend()); } + // Follows equation 1.5 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto -population_skewness(ForwardIterator first, ForwardIterator last) +auto skewness(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness."); @@ -168,7 +118,7 @@ population_skewness(ForwardIterator first, ForwardIterator last) double M2 = 0; double M3 = 0; double n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { double delta21 = *it - M1; double tmp = delta21/n; @@ -178,15 +128,15 @@ population_skewness(ForwardIterator first, ForwardIterator last) n += 1; } - double variance = M2/(n-1); - if (variance == 0) + double var = M2/(n-1); + if (var == 0) { // The limit is technically undefined, but the interpretation here is clear: // A constant dataset has no skewness. return double(0); } - double skewness = M3/((n-1)*variance*sqrt(variance)); - return skewness; + double skew = M3/(M2*sqrt(var)); + return skew; } else { @@ -194,7 +144,7 @@ population_skewness(ForwardIterator first, ForwardIterator last) Real M2 = 0; Real M3 = 0; Real n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { Real delta21 = *it - M1; Real tmp = delta21/n; @@ -204,29 +154,28 @@ population_skewness(ForwardIterator first, ForwardIterator last) n += 1; } - Real variance = M2/(n-1); - if (variance == 0) + Real var = M2/(n-1); + if (var == 0) { // The limit is technically undefined, but the interpretation here is clear: // A constant dataset has no skewness. return Real(0); } - Real skewness = M3/((n-1)*variance*sqrt(variance)); - return skewness; + Real skew = M3/(M2*sqrt(var)); + return skew; } } template -inline auto population_skewness(Container const & v) +inline auto skewness(Container const & v) { - return population_skewness(v.cbegin(), v.cend()); + return skewness(v.cbegin(), v.cend()); } // Follows equation 1.6 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto -population_kurtosis(ForwardIterator first, ForwardIterator last) +auto kurtosis(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute kurtosis."); @@ -237,7 +186,7 @@ population_kurtosis(ForwardIterator first, ForwardIterator last) double M3 = 0; double M4 = 0; double n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { double delta21 = *it - M1; double tmp = delta21/n; @@ -248,13 +197,13 @@ population_kurtosis(ForwardIterator first, ForwardIterator last) n += 1; } - double variance = M2/(n-1); - if (variance == 0) + double var = M2/(n-1); + if (var == 0) { return double(0); } - double kurtosis = M4/((n-1)*variance*variance); - return kurtosis; + double kurt = M4/((n-1)*var*var); + return kurt; } else { @@ -263,7 +212,7 @@ population_kurtosis(ForwardIterator first, ForwardIterator last) Real M3 = 0; Real M4 = 0; Real n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { Real delta21 = *it - M1; Real tmp = delta21/n; @@ -274,30 +223,29 @@ population_kurtosis(ForwardIterator first, ForwardIterator last) n += 1; } - Real variance = M2/(n-1); - if (variance == 0) + Real var = M2/(n-1); + if (var == 0) { // Again, the limit is technically undefined, but the interpretation here is clear: // A constant dataset has no kurtosis. return Real(0); } - Real kurtosis = M4/((n-1)*variance*variance); - return kurtosis; + Real kurt = M4/((n-1)*var*var); + return kurt; } } template -inline auto population_kurtosis(Container const & v) +inline auto kurtosis(Container const & v) { - return population_kurtosis(v.cbegin(), v.cend()); + return kurtosis(v.cbegin(), v.cend()); } // Follows equation 1.5/1.6 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto -first_four_moments(ForwardIterator first, ForwardIterator last) +auto first_four_moments(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); @@ -308,7 +256,7 @@ first_four_moments(ForwardIterator first, ForwardIterator last) double M3 = 0; double M4 = 0; double n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { double delta21 = *it - M1; double tmp = delta21/n; @@ -328,7 +276,7 @@ first_four_moments(ForwardIterator first, ForwardIterator last) Real M3 = 0; Real M4 = 0; Real n = 2; - for (auto it = first + 1; it != last; ++it) + for (auto it = std::next(first); it != last; ++it) { Real delta21 = *it - M1; Real tmp = delta21/n; @@ -349,6 +297,80 @@ inline auto first_four_moments(Container const & v) return first_four_moments(v.cbegin(), v.cend()); } +template +auto median(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + + +template +inline auto median(RandomAccessContainer & v) +{ + return median(v.begin(), v.end()); +} + +template +auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last); + + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + + return ((2*num)/denom - i)/(i-1); +} + +template +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(v.begin(), v.end()); +} + +template +inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t n = std::distance(first, last); + return n*gini_coefficient(first, last)/(n-1); +} + +template +inline auto sample_gini_coefficient(RandomAccessContainer & v) +{ + return sample_gini_coefficient(v.begin(), v.end()); +} + + }}} #endif diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp index 5ba592d600..528df4cd25 100644 --- a/test/bivariate_statistics_test.cpp +++ b/test/bivariate_statistics_test.cpp @@ -32,8 +32,8 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ -using boost::math::tools::means_and_population_covariance; -using boost::math::tools::population_covariance; +using boost::math::tools::means_and_covariance; +using boost::math::tools::covariance; template void test_covariance() @@ -45,7 +45,7 @@ void test_covariance() // Covariance of a single thing is zero: std::array u1{8}; std::array v1{17}; - auto [mu_u1, mu_v1, cov1] = means_and_population_covariance(u1, v1); + auto [mu_u1, mu_v1, cov1] = means_and_covariance(u1, v1); BOOST_TEST(abs(cov1) < tol); BOOST_TEST(abs(mu_u1 - 8) < tol); @@ -54,7 +54,7 @@ void test_covariance() std::array u2{8, 4}; std::array v2{3, 7}; - auto [mu_u2, mu_v2, cov2] = means_and_population_covariance(u2, v2); + auto [mu_u2, mu_v2, cov2] = means_and_covariance(u2, v2); BOOST_TEST(abs(cov2+4) < tol); BOOST_TEST(abs(mu_u2 - 6) < tol); @@ -63,22 +63,22 @@ void test_covariance() std::vector u3{1,2,3}; std::vector v3{1,1,1}; - auto [mu_u3, mu_v3, cov3] = means_and_population_covariance(u3,v3); + auto [mu_u3, mu_v3, cov3] = means_and_covariance(u3, v3); // Since v is constant, covariance(u,v) = 0 against everything any u: BOOST_TEST(abs(cov3) < tol); BOOST_TEST(abs(mu_u3 - 2) < tol); BOOST_TEST(abs(mu_v3 - 1) < tol); - // Make sure we pull the correct symbol out of means_and_populaton_covariance: - cov3 = population_covariance(u3, v3); + // Make sure we pull the correct symbol out of means_and_covariance: + cov3 = covariance(u3, v3); BOOST_TEST(abs(cov3) < tol); - cov3 = population_covariance(v3, u3); + cov3 = covariance(v3, u3); // Covariance is symmetric: cov(u,v) = cov(v,u) BOOST_TEST(abs(cov3) < tol); // cov(u,u) = sigma(u)^2: - cov3 = population_covariance(u3, u3); + cov3 = covariance(u3, u3); Real expected = Real(2)/Real(3); BOOST_TEST(abs(cov3 - expected) < tol); @@ -88,24 +88,27 @@ void test_covariance() std::uniform_real_distribution dis(-1.0, 1.0); std::vector u(500); std::vector v(500); - for(size_t i = 0; i < u.size(); ++i) { + for(size_t i = 0; i < u.size(); ++i) + { u[i] = (Real) dis(gen); v[i] = (Real) dis(gen); } - auto [mu_u, sigma_u_sq] = boost::math::tools::mean_and_population_variance(u); - auto [mu_v, sigma_v_sq] = boost::math::tools::mean_and_population_variance(v); + Real mu_u = boost::math::tools::mean(u); + Real mu_v = boost::math::tools::mean(v); + Real sigma_u_sq = boost::math::tools::variance(u); + Real sigma_v_sq = boost::math::tools::variance(v); - auto [mu_u_, mu_v_, cov_uv] = means_and_population_covariance(u, v); + auto [mu_u_, mu_v_, cov_uv] = means_and_covariance(u, v); BOOST_TEST(abs(mu_u - mu_u_) < tol); BOOST_TEST(abs(mu_v - mu_v_) < tol); // Cauchy-Schwartz inequality: BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq); // cov(X, X) = sigma(X)^2: - Real cov_uu = population_covariance(u, u); + Real cov_uu = covariance(u, u); BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol); - Real cov_vv = population_covariance(v, v); + Real cov_vv = covariance(v, v); BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol); } diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index ded487c998..62cc74e863 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -101,38 +101,163 @@ void test_complex_mean() } template -void test_mean_and_population_variance() +void test_variance() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1,1,1,1,1}; - auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end()); - BOOST_TEST(abs(mu - 1) < tol); + Real sigma_sq = boost::math::tools::variance(v.begin(), v.end()); BOOST_TEST(abs(sigma_sq) < tol); + sigma_sq = boost::math::tools::variance(v); + BOOST_TEST(abs(sigma_sq) < tol); + + Real s_sq = boost::math::tools::sample_variance(v); + BOOST_TEST(abs(s_sq) < tol); + std::vector u{1}; - auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend()); - BOOST_TEST(abs(mu1 - 1) < tol); - BOOST_TEST(abs(sigma1_sq) < tol); + sigma_sq = boost::math::tools::variance(u.cbegin(), u.cend()); + BOOST_TEST(abs(sigma_sq) < tol); std::array w{0,1,0,1,0,1,0,1}; - auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end()); - BOOST_TEST(abs(mu2 - 1.0/2.0) < tol); - BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol); + sigma_sq = boost::math::tools::variance(w.begin(), w.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); - auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w); - BOOST_TEST(abs(mu3 - 1.0/2.0) < tol); - BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol); + sigma_sq = boost::math::tools::variance(w); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + std::forward_list l{0,1,0,1,0,1,0,1}; + sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); } template -void test_integer_mean_and_population_variance() +void test_integer_variance() { double tol = std::numeric_limits::epsilon(); std::vector v{1,1,1,1,1,1}; - auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v); - BOOST_TEST(abs(mu - 1) < tol); + double sigma_sq = boost::math::tools::variance(v); BOOST_TEST(abs(sigma_sq) < tol); + + std::forward_list l{0,1,0,1,0,1,0,1}; + sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); +} + +template +void test_integer_skewness() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - 3.0/2.0) < tol); + + std::forward_list v2{0,0,0,0,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - 3.0/2.0) < tol); + +} + +template +void test_skewness() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + // Dataset is symmetric about the mean: + v = {1,2,3,4,5}; + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 + skew = boost::math::tools::skewness(v); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + std::array w1{0,0,0,0,5}; + skew = boost::math::tools::skewness(w1); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + std::forward_list w2{0,0,0,0,5}; + skew = boost::math::tools::skewness(w2); + BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); +} + +template +void test_kurtosis() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + Real kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - Real(17)/Real(10)) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + + std::array v1{0,0,0,0,5}; + kurt = boost::math::tools::kurtosis(v1); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + + std::forward_list v2{0,0,0,0,5}; + kurt = boost::math::tools::kurtosis(v2); + BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + +} + +template +void test_integer_kurtosis() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + double kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt) < tol); + + v = {1,2,3,4,5}; + // mu =1, sigma^2 = 2, kurtosis = 17/10 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - 17.0/10.0) < tol); + + v = {0,0,0,0,5}; + // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 + kurt = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(kurt - 13.0/4.0) < tol); +} + +template +void test_first_four_moments() +{ + Real tol = 10*std::numeric_limits::epsilon(); + std::vector v{1,1,1}; + auto [M1_1, M2_1, M3_1, M4_1] = boost::math::tools::first_four_moments(v); + BOOST_TEST(abs(M1_1 - 1) < tol); + BOOST_TEST(abs(M2_1) < tol); + BOOST_TEST(abs(M3_1) < tol); + BOOST_TEST(abs(M4_1) < tol); + + v = {1, 2, 3, 4, 5}; + auto [M1_2, M2_2, M3_2, M4_2] = boost::math::tools::first_four_moments(v); + BOOST_TEST(abs(M1_2 - 3) < tol); + BOOST_TEST(abs(M2_2 - 2) < tol); + BOOST_TEST(abs(M3_2) < tol); + BOOST_TEST(abs(M4_2 - Real(34)/Real(5)) < tol); } template @@ -183,145 +308,114 @@ void test_median() std::array w{1,2,3}; m = boost::math::tools::median(w); BOOST_TEST_EQ(m, 2); + + // Does it work with ublas? + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 2; + w1[2] = 3; + m = boost::math::tools::median(w); + BOOST_TEST_EQ(m, 2); } template -void test_gini_coefficient() +void test_sample_gini_coefficient() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,0,0}; - Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + Real gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); - gini = boost::math::tools::gini_coefficient(v); + gini = boost::math::tools::sample_gini_coefficient(v); BOOST_TEST(abs(gini - 1) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); v[0] = 0; v[1] = 0; v[2] = 0; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); std::array w{0,0,0}; - gini = boost::math::tools::gini_coefficient(w); + gini = boost::math::tools::sample_gini_coefficient(w); BOOST_TEST(abs(gini) < tol); } -template -void test_integer_skewness() -{ - double tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1}; - double skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew) < tol); - - // Dataset is symmetric about the mean: - v = {1,2,3,4,5}; - skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew) < tol); - - v = {0,0,0,0,5}; - // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 - skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew - 3.0/2.0) < tol); - -} template -void test_skewness() +void test_gini_coefficient() { Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1}; - Real skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew) < tol); - - // Dataset is symmetric about the mean: - v = {1,2,3,4,5}; - skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew) < tol); - - v = {0,0,0,0,5}; - // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 - skew = boost::math::tools::population_skewness(v); - BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + std::vector v{1,0,0}; + Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + Real expected = Real(2)/Real(3); + BOOST_TEST(abs(gini - expected) < tol); -} + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < tol); -template -void test_kurtosis() -{ - Real tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1}; - Real kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis) < tol); + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); - v = {1,2,3,4,5}; - // mu =1, sigma^2 = 2, kurtosis = 17/10 - kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis - Real(17)/Real(10)) < tol); + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); - v = {0,0,0,0,5}; - // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 - kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis- Real(13)/Real(4)) < tol); -} + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); -template -void test_integer_kurtosis() -{ - double tol = std::numeric_limits::epsilon(); - std::vector v{1,1,1}; - double kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis) < tol); + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 1; + w1[2] = 1; + gini = boost::math::tools::gini_coefficient(w1); + BOOST_TEST(abs(gini) < tol); - v = {1,2,3,4,5}; - // mu =1, sigma^2 = 2, kurtosis = 17/10 - kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis - 17.0/10.0) < tol); + std::mt19937 gen(18); + // Gini coefficient for a uniform distribution is (b-a)/(3*(b+a)); + std::uniform_real_distribution dis(0, 3); + expected = (dis.b() - dis.a())/(3*(dis.b()+ dis.a())); + v.resize(1024); + for(size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < 0.01); - v = {0,0,0,0,5}; - // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 - kurtosis = boost::math::tools::population_kurtosis(v); - BOOST_TEST(abs(kurtosis- 13.0/4.0) < tol); } - int main() { - test_integer_mean(); - test_integer_mean(); - test_integer_mean(); - test_mean(); test_mean(); test_mean(); test_mean(); + test_integer_mean(); + test_integer_mean(); + test_integer_mean(); + test_complex_mean>(); test_complex_mean(); - test_mean_and_population_variance(); - test_mean_and_population_variance(); - test_mean_and_population_variance(); - test_mean_and_population_variance(); - - test_integer_mean_and_population_variance(); - - test_median(); - test_median(); - test_median(); - test_median(); + test_variance(); + test_variance(); + test_variance(); + test_variance(); - test_gini_coefficient(); - test_gini_coefficient(); - test_gini_coefficient(); - test_gini_coefficient(); + test_integer_variance(); test_skewness(); test_skewness(); @@ -335,7 +429,27 @@ int main() test_kurtosis(); test_kurtosis(); + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); + test_integer_kurtosis(); + test_median(); + test_median(); + test_median(); + test_median(); + + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + test_gini_coefficient(); + + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + test_sample_gini_coefficient(); + return boost::report_errors(); } From aa91164f0e2de9b1025f4594845089d641659026 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 15:11:15 -0700 Subject: [PATCH 23/46] Documentation cleanup [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 23 ++++++++++-------- .../univariate_statistics.qbk | 2 +- .../boost/math/tools/signal_statistics.hpp | 24 ++++++++++++++----- test/signal_statistics_test.cpp | 18 +++++++------- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 4fba934ecd..b66acccefd 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -27,6 +27,12 @@ namespace boost{ namespace math{ namespace tools { template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + template + auto sample_absolute_gini_coefficient(Container & c); + + template + auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last); + template auto hoyer_sparsity(Container const & c); @@ -73,12 +79,13 @@ The absolute median supports both real and complex arithmetic, modifies its inpu The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious. -ee [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard] for details. +See [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard] for details. However, for measuring sparsity, the phase of the numbers is irrelevant, so we provide the `absolute_gini_coefficient`: + using boost::math::tools::sample_absolute_gini_coefficient; using boost::math::tools::absolute_gini_coefficient; std::vector> v{{0,1}, {0,0}, {0,0}, {0,0}}; - double abs_gini = absolute_gini_coefficient(v); + double abs_gini = sample_absolute_gini_coefficient(v); // now abs_gini = 1; maximally unequal std::vector> w{{0,1}, {1,0}, {0,-1}, {-1,0}}; @@ -91,15 +98,12 @@ However, for measuring sparsity, the phase of the numbers is irrelevant, so we p // Alternative call useful for computing over subset of the input: abs_gini = absolute_gini_coefficient(u.begin(), u.begin() + 1); - // If you need the population Gini coefficient: - double population_gini = (u.size() -1)*absolute_gini_coefficient(u)/u.size(); -Wikipedia calls our scaling a "sample Gini coefficient". -We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient, -whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. +The sample Gini coefficient returns unity for a vector which has only one nonzero coefficient. +The population Gini coefficient of a vector with one non-zero element is dependent on the length of the input. -Our scaling lacks one desirable property of the population Gini coefficient, namely that "cloning" a vector has the same Gini coefficient. -If you wish to recover the cloning property, convert to the population Gini coefficient. +The sample Gini coefficient lacks one desirable property of the population Gini coefficient, +namely that "cloning" a vector has the same Gini coefficient; though cloning holds to very high accuracy with the sample Gini coefficient and can easily be recovered by a rescaling. If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?), consider calculating the Hoyer sparsity instead. @@ -205,7 +209,6 @@ Then the method has no mechanism for distinguishing the signal from the noise, a [heading References] -* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. * Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. * Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. * Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 7e345fb2c8..6f9b542b7e 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -186,7 +186,7 @@ Compute the Gini coefficient of a dataset: gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); // gini = 0, as all elements are now equal. -/Nota bene: The input data is altered-in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators./ +/Nota bene/: The input data is altered-in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators. The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 8eae5fb35c..e474a6617c 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -72,17 +72,29 @@ auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) decltype(abs(*first)) zero = 0; return zero; } - return ((2*num)/denom - i)/(i-2); + return ((2*num)/denom - i)/(i-1); +} +template +inline auto absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::tools::absolute_gini_coefficient(v.begin(), v.end()); +} +template +auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + return n*boost::math::tools::absolute_gini_coefficient(first, last)/(n-1); } template -inline auto absolute_gini_coefficient(RandomAccessContainer & v) +inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v) { - return absolute_gini_coefficient(v.begin(), v.end()); + return boost::math::tools::sample_absolute_gini_coefficient(v.begin(), v.end()); } + // The Hoyer sparsity measure is defined in: // https://arxiv.org/pdf/0811.4706.pdf template @@ -118,7 +130,7 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) template inline auto hoyer_sparsity(Container const & v) { - return hoyer_sparsity(v.cbegin(), v.cend()); + return boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); } @@ -224,7 +236,7 @@ template auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal) { using std::log10; - return 10*log10(mean_invariant_oracle_snr(signal, noisy_signal)); + return 10*log10(boost::math::tools::mean_invariant_oracle_snr(signal, noisy_signal)); } @@ -233,7 +245,7 @@ template auto oracle_snr_db(Container const & signal, Container const & noisy_signal) { using std::log10; - return 10*log10(oracle_snr(signal, noisy_signal)); + return 10*log10(boost::math::tools::oracle_snr(signal, noisy_signal)); } // A good reference on the M2M4 estimator: diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 7fe3fa04d3..bf247f2cf2 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -227,19 +227,22 @@ template void test_absolute_gini_coefficient() { using boost::math::tools::absolute_gini_coefficient; + using boost::math::tools::sample_absolute_gini_coefficient; Real tol = std::numeric_limits::epsilon(); std::vector v{-1,0,0}; - Real gini = absolute_gini_coefficient(v.begin(), v.end()); + Real gini = sample_absolute_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); gini = absolute_gini_coefficient(v); - BOOST_TEST(abs(gini - 1) < tol); + BOOST_TEST(abs(gini - Real(2)/Real(3)) < tol); v[0] = 1; v[1] = -1; v[2] = 1; gini = absolute_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); + gini = sample_absolute_gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); std::vector> w(128); std::complex i{0,1}; @@ -249,6 +252,8 @@ void test_absolute_gini_coefficient() } gini = absolute_gini_coefficient(w.begin(), w.end()); BOOST_TEST(abs(gini) < tol); + gini = sample_absolute_gini_coefficient(w.begin(), w.end()); + BOOST_TEST(abs(gini) < tol); // The population Gini index is invariant under "cloning": If w = v \oplus v, then G(w) = G(v). // We use the sample Gini index, so we need to rescale @@ -263,11 +268,8 @@ void test_absolute_gini_coefficient() { u[i + u.size()/2] = u[i]; } - std::cout << std::setprecision(std::numeric_limits::digits10 + 1); - Real scale1 = (u.size() - 2)/static_cast(u.size()); - Real scale2 = (u.size() - 1)/static_cast(u.size()); - Real population_gini1 = scale1*absolute_gini_coefficient(u.begin(), u.begin() + u.size()/2); - Real population_gini2 = scale2*absolute_gini_coefficient(u.begin(), u.end()); + Real population_gini1 = absolute_gini_coefficient(u.begin(), u.begin() + u.size()/2); + Real population_gini2 = absolute_gini_coefficient(u.begin(), u.end()); BOOST_TEST(abs(population_gini1 - population_gini2) < 10*tol); @@ -281,7 +283,7 @@ void test_absolute_gini_coefficient() { u[i] = exp_dis(gen); } - population_gini2 = scale2*absolute_gini_coefficient(u); + population_gini2 = absolute_gini_coefficient(u); BOOST_TEST(abs(population_gini2 - 0.5) < 0.01); } From 01ba0fea91689acb0e69322f686ae2622568c960 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 17 Dec 2018 15:20:46 -0700 Subject: [PATCH 24/46] Meaningless commit to kick off build. --- doc/vector_functionals/norms.qbk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/vector_functionals/norms.qbk b/doc/vector_functionals/norms.qbk index 553ea56763..e8e9183c16 100644 --- a/doc/vector_functionals/norms.qbk +++ b/doc/vector_functionals/norms.qbk @@ -1,5 +1,5 @@ [/ - Copyright 2017 Nick Thompson + Copyright 2018 Nick Thompson Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at From f51e55c3cbe1a7d85ff393e76bcbd6874dd91e6f Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Tue, 18 Dec 2018 11:46:46 -0700 Subject: [PATCH 25/46] Take advice of cppcheck [CI SKIP] --- include/boost/math/tools/signal_statistics.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index e474a6617c..ae0ad1fd3f 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -210,12 +210,12 @@ auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy using Real = typename Container::value_type; BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements."); - Real mean = boost::math::tools::mean(signal); + Real mu = boost::math::tools::mean(signal); Real numerator = 0; Real denominator = 0; for (size_t i = 0; i < signal.size(); ++i) { - Real tmp = signal[i] - mean; + Real tmp = signal[i] - mu; numerator += tmp*tmp; denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); } @@ -318,7 +318,12 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val } return S/N; } - + S = M2 - N; + if (S < 0) + { + return std::numeric_limits::quiet_NaN(); + } + return S/N; } S = sqrt(Ssq); N = M2 - S; From 4848accb7737f6295328bc9fe040f479f410727e Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Tue, 25 Dec 2018 23:00:46 -0700 Subject: [PATCH 26/46] Add unit tests demonstrating that the kurtosis of various continuous distributions is calculated correctly. Add excess_kurtosis to make clear the working definition of kurtosis. [CI SKIP] --- .../univariate_statistics.qbk | 9 +++++- .../math/tools/univariate_statistics.hpp | 12 +++++++ test/univariate_statistics_test.cpp | 32 ++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 6f9b542b7e..6d448ad52c 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -45,6 +45,12 @@ namespace boost{ namespace math{ namespace tools { template auto kurtosis(ForwardIterator first, ForwardIterator last); + template + auto excess_kurtosis(Container const & c); + + template + auto excess_kurtosis(ForwardIterator first, ForwardIterator last); + template auto first_four_moments(Container const & c); @@ -150,7 +156,8 @@ The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-contr The input data must be forward iterable and must consist of real or integral values. If the input data is integral, the output is a double precision float. Note that this is /not/ the excess kurtosis. -If you require the excess kurtosis, subtract 3 from the kurtosis. +If you require the excess kurtosis, use `boost::math::tools::excess_kurtosis`. +This function simply subtracts 3 from the kurtosis, but it makes eminently clear our definition of kurtosis. [heading First four moments] diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index ca3411c431..0c5b2dca89 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -241,6 +241,18 @@ inline auto kurtosis(Container const & v) return kurtosis(v.cbegin(), v.cend()); } +template +auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template +inline auto excess_kurtosis(Container const & v) +{ + return excess_kurtosis(v.cbegin(), v.cend()); +} + // Follows equation 1.5/1.6 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index 62cc74e863..a4baa0e9b2 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -220,6 +220,35 @@ void test_kurtosis() kurt = boost::math::tools::kurtosis(v2); BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + std::vector v3(10000); + std::mt19937 gen(42); + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = dis(gen); + } + kurt = boost::math::tools::kurtosis(v3); + BOOST_TEST(abs(kurt - 3) < 0.1); + + std::uniform_real_distribution udis(-1, 3); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = udis(gen); + } + auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3); + BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2); + + + // This test only passes when there are a large number of samples. + // Otherwise, the distribution doesn't generate enough outliers to give, + // or generates too many, giving pretty wildly different values of kurtosis on different runs. + // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run. + // The CI system, however, would die on a million long doubles. + //std::exponential_distribution edis(0.1); + //for (size_t i = 0; i < v3.size(); ++i) { + // v3[i] = edis(gen); + //} + //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3; + //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2); + } template @@ -427,7 +456,8 @@ int main() test_kurtosis(); test_kurtosis(); test_kurtosis(); - test_kurtosis(); + // Kinda expensive: + //test_kurtosis(); test_first_four_moments(); test_first_four_moments(); From eab2b5260ed58c625a969ea3768151c6781951fa Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Wed, 26 Dec 2018 18:30:02 -0700 Subject: [PATCH 27/46] Solve quadratic equations --- doc/roots/roots.qbk | 18 ++++ include/boost/math/tools/roots.hpp | 168 +++++++++++++++++++++++++++++ test/test_roots.cpp | 96 +++++++++++++++++ 3 files changed, 282 insertions(+) diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk index e5df37b05a..95a3ec905e 100644 --- a/doc/roots/roots.qbk +++ b/doc/roots/roots.qbk @@ -32,6 +32,9 @@ template Complex complex_newton(F f, Complex guess, int max_iterations = std::numeric_limits::digits); + template + auto solve_quadratic(T const & a, T const & b, T const & c); + }}} // namespaces boost::math::tools. [h4 Description] @@ -45,6 +48,8 @@ __halley and __schroder iteration. * `complex_newton` performs Newton's method on complex-analytic functions. +* `solve_quadratic` solves quadratic equations using various tricks to keep catastrophic cancellation from occurring in computation of the discriminant. + [variablelist Parameters of the real-valued root finding functions [[F f] [Type F must be a callable function object (or C++ lambda) that accepts one parameter and @@ -173,6 +178,19 @@ Finally, the derivative of /f/ must be continuous at the root or else non-roots An example usage of `complex_newton` is given in `examples/daubechies_coefficients.cpp`. +[h4 Quadratics] + +To solve a quadratic /ax[super 2] + bx + c = 0/, we may use + + auto [x0, x1] = boost::math::tools::solve_quadratic(a, b, c); + +If the roots are complex and the inputs are real, /x0/ and /x1/ are /nans/. +In this case we must cast /a/, /b/ and /c/ to a complex to extract the complex roots. +If /a/, /b/ and /c/ are integral, then the roots are of type double. +The routine is much faster if the fused-multiply-add instruction is available on your architecture; +if it is not available, the routine resorts to emulation. + + [h4 Examples] See __root_finding_examples. diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 62a24c2a5f..02d104044a 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -9,6 +9,8 @@ #ifdef _MSC_VER #pragma once #endif +#include // test for multiprecision types. + #include #include #include @@ -649,6 +651,172 @@ Complex complex_newton(F g, Complex guess, int max_iterations=std::numeric_limit } #endif + +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) +// https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711 +namespace detail +{ + template + T diff_of_products (T a, T b, T c, T d) + { + T w = d*c; + T e = std::fma(-d, c, w); + T f = std::fma(a, b, -w); + return f + e; + } +} + +template +auto solve_quadratic(T const& a, T const& b, T const& c) +{ + using std::copysign; + using std::sqrt; + if constexpr (std::is_integral::value) + { + double nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + double x0_sq = -double(c)/double(a); + if (x0_sq < 0) { + return std::pair(nan, nan); + } + double x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + double discriminant = detail::diff_of_products(double(b), double(b), 4.0*double(a), double(c)); + if (discriminant < 0) + { + return std::pair(nan, nan); + } + double q = -(b + copysign(sqrt(discriminant), double(b)))/T(2); + double x0 = q/a; + double x1 = c/q; + if (x0 < x1) { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } + else if constexpr (std::is_floating_point::value) + { + T nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + T x0_sq = -c/a; + if (x0_sq < 0) { + return std::pair(nan, nan); + } + T x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + T discriminant = detail::diff_of_products(b, b, 4*a, c); + // Is there a sane way to flush very small negative values to zero? + // If there is I don't know of it. + if (discriminant < 0) + { + std::cout << "Discriminant = " << discriminant << "\n"; + return std::pair(nan, nan); + } + T q = -(b + copysign(sqrt(discriminant), b))/T(2); + T x0 = q/a; + T x1 = c/q; + if (x0 < x1) + { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } + else if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + typename T::value_type nan = std::numeric_limits::quiet_NaN(); + if(a.real()==0 && a.imag() ==0) + { + using std::norm; + if (b.real()==0 && b.imag() && norm(c) != 0) + { + return std::pair({nan, nan}, {nan, nan}); + } + else if (b.real()==0 && b.imag() && c.real() ==0 && c.imag() == 0) + { + return std::pair({0,0},{0,0}); + } + return std::pair(-c/b, -c/b); + } + if (b.real()==0 && b.imag() == 0) + { + T x0_sq = -c/a; + T x0 = sqrt(x0_sq); + return std::pair(-x0, x0); + } + // There's no fma for complex types: + T discriminant = b*b - T(4)*a*c; + T q = -(b + sqrt(discriminant))/T(2); + return std::pair(q/a, c/q); + } + else // Most likely the type is a boost.multiprecision. + { //There is no fma for multiprecision, and in addition it doesn't seem to be useful, so revert to the naive computation. + T nan = std::numeric_limits::quiet_NaN(); + if(a==0) + { + if (b==0 && c != 0) + { + return std::pair(nan, nan); + } + else if (b==0 && c==0) + { + return std::pair(0,0); + } + return std::pair(-c/b, -c/b); + } + if (b==0) + { + T x0_sq = -c/a; + if (x0_sq < 0) { + return std::pair(nan, nan); + } + T x0 = sqrt(x0_sq); + return std::pair(-x0,x0); + } + T discriminant = b*b - 4*a*c; + if (discriminant < 0) + { + return std::pair(nan, nan); + } + T q = -(b + copysign(sqrt(discriminant), b))/T(2); + T x0 = q/a; + T x1 = c/q; + if (x0 < x1) + { + return std::pair(x0, x1); + } + return std::pair(x1, x0); + } +} +#endif + } // namespace tools } // namespace math } // namespace boost diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 3073397302..93b01ad965 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #define BOOST_CHECK_CLOSE_EX(a, b, prec, i) \ @@ -420,6 +421,91 @@ void test_daubechies_fails() } #endif +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) +template +void test_solve_real_quadratic() +{ + Real tol = std::numeric_limits::epsilon(); + using boost::math::tools::solve_quadratic; + auto [x0, x1] = solve_quadratic(1, 0, -1); + BOOST_CHECK_CLOSE(x0, Real(-1), tol); + BOOST_CHECK_CLOSE(x1, Real(1), tol); + + auto p = solve_quadratic(7, 0, 0); + BOOST_CHECK_SMALL(p.first, tol); + BOOST_CHECK_SMALL(p.second, tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = solve_quadratic(1, -14, 49); + BOOST_CHECK_CLOSE(p.first, Real(7), tol); + BOOST_CHECK_CLOSE(p.second, Real(7), tol); + + // This test does not pass in multiprecision, + // due to the fact it does not have an fma: + if (std::is_floating_point::value) + { + // (x-1)(x-1-eps) = x^2 + (-eps - 2)x + (1)(1+eps) + Real eps = 2*std::numeric_limits::epsilon(); + p = solve_quadratic(256, 256*(-2 - eps), 256*(1 + eps)); + BOOST_CHECK_CLOSE(p.first, Real(1), tol); + BOOST_CHECK_CLOSE(p.second, Real(1) + eps, tol); + } + + if (std::is_same::value) + { + // Kahan's example: This is the test that demonstrates the necessity of the fma instruction. + // https://en.wikipedia.org/wiki/Loss_of_significance#Instability_of_the_quadratic_equation + p = solve_quadratic(94906265.625, -189812534, 94906268.375); + BOOST_CHECK_CLOSE_FRACTION(p.first, Real(1), tol); + BOOST_CHECK_CLOSE_FRACTION(p.second, 1.000000028975958, 4*tol); + } +} + +template +void test_solve_int_quadratic() +{ + double tol = std::numeric_limits::epsilon(); + using boost::math::tools::solve_quadratic; + auto [x0, x1] = solve_quadratic(1, 0, -1); + BOOST_CHECK_CLOSE(x0, double(-1), tol); + BOOST_CHECK_CLOSE(x1, double(1), tol); + + auto p = solve_quadratic(7, 0, 0); + BOOST_CHECK_SMALL(p.first, tol); + BOOST_CHECK_SMALL(p.second, tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = solve_quadratic(1, -14, 49); + BOOST_CHECK_CLOSE(p.first, double(7), tol); + BOOST_CHECK_CLOSE(p.second, double(7), tol); +} + +template +void test_solve_complex_quadratic() +{ + using Real = typename Complex::value_type; + Real tol = std::numeric_limits::epsilon(); + using boost::math::tools::solve_quadratic; + auto [x0, x1] = solve_quadratic({1,0}, {0,0}, {-1,0}); + BOOST_CHECK_CLOSE(x0.real(), Real(-1), tol); + BOOST_CHECK_CLOSE(x1.real(), Real(1), tol); + BOOST_CHECK_SMALL(x0.imag(), tol); + BOOST_CHECK_SMALL(x1.imag(), tol); + + auto p = solve_quadratic({7,0}, {0,0}, {0,0}); + BOOST_CHECK_SMALL(p.first.real(), tol); + BOOST_CHECK_SMALL(p.second.real(), tol); + + // (x-7)^2 = x^2 - 14*x + 49: + p = solve_quadratic({1,0}, {-14,0}, {49,0}); + BOOST_CHECK_CLOSE(p.first.real(), Real(7), tol); + BOOST_CHECK_CLOSE(p.second.real(), Real(7), tol); + +} + + +#endif + BOOST_AUTO_TEST_CASE( test_main ) { @@ -432,4 +518,14 @@ BOOST_AUTO_TEST_CASE( test_main ) test_daubechies_fails(); #endif +#if !defined(BOOST_NO_CXX17_IF_CONSTEXPR) + test_solve_real_quadratic(); + test_solve_real_quadratic(); + test_solve_real_quadratic(); + test_solve_real_quadratic(); + + test_solve_int_quadratic(); + test_solve_complex_quadratic>(); +#endif + } From 1de89f997ad6dcb5d7f328c20b69d7f99e33c928 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 27 Dec 2018 11:08:29 -0700 Subject: [PATCH 28/46] Clarify documentation [CI SKIP] --- doc/roots/roots.qbk | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk index 95a3ec905e..ef64aafdc6 100644 --- a/doc/roots/roots.qbk +++ b/doc/roots/roots.qbk @@ -180,15 +180,21 @@ An example usage of `complex_newton` is given in `examples/daubechies_coefficien [h4 Quadratics] -To solve a quadratic /ax[super 2] + bx + c = 0/, we may use +To solve a quadratic /ax/[super 2] + /bx/ + /c/ = 0, we may use auto [x0, x1] = boost::math::tools::solve_quadratic(a, b, c); -If the roots are complex and the inputs are real, /x0/ and /x1/ are /nans/. -In this case we must cast /a/, /b/ and /c/ to a complex to extract the complex roots. -If /a/, /b/ and /c/ are integral, then the roots are of type double. -The routine is much faster if the fused-multiply-add instruction is available on your architecture; -if it is not available, the routine resorts to emulation. +If the roots are real, they are arranged so that `x0` \u2264 `x1`. +If the roots are complex and the inputs are real, `x0` and `x1` are both `std::numeric_limits::quiet_NaN()`. +In this case we must cast `a`, `b` and `c` to a complex type to extract the complex roots. +If `a`, `b` and `c` are integral, then the roots are of type double. +The routine is much faster if the fused-multiply-add instruction is available on your architecture. +If the fma is not available, the function resorts to slow emulation. +Finally, speed is improved if you compile for your particular architecture. +For instance, if you compile without any architecture flags, then the `std::fma` call compiles down to `call _fma`, +which dynamically chooses to emulate or execute the `vfmadd132sd` instruction based on the capabilities of the architecture. +If instead, you compile with (say) `-march=native` then no dynamic choice is made: +The `vfmadd132sd` instruction is always executed if available and emulation is used if not. [h4 Examples] From bee2889e85ae0ab35996d50c38132f1dd413ed91 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 27 Dec 2018 12:44:46 -0700 Subject: [PATCH 29/46] Change solve_quadratic to quadratic_roots. [CI SKIP] --- doc/roots/roots.qbk | 4 ++-- include/boost/math/tools/roots.hpp | 12 +++++++----- test/test_roots.cpp | 28 ++++++++++++++-------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk index ef64aafdc6..8880ba2cba 100644 --- a/doc/roots/roots.qbk +++ b/doc/roots/roots.qbk @@ -33,7 +33,7 @@ Complex complex_newton(F f, Complex guess, int max_iterations = std::numeric_limits::digits); template - auto solve_quadratic(T const & a, T const & b, T const & c); + auto quadratic_roots(T const & a, T const & b, T const & c); }}} // namespaces boost::math::tools. @@ -182,7 +182,7 @@ An example usage of `complex_newton` is given in `examples/daubechies_coefficien To solve a quadratic /ax/[super 2] + /bx/ + /c/ = 0, we may use - auto [x0, x1] = boost::math::tools::solve_quadratic(a, b, c); + auto [x0, x1] = boost::math::tools::quadratic_roots(a, b, c); If the roots are real, they are arranged so that `x0` \u2264 `x1`. If the roots are complex and the inputs are real, `x0` and `x1` are both `std::numeric_limits::quiet_NaN()`. diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 02d104044a..decdf19a32 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -657,7 +657,7 @@ Complex complex_newton(F g, Complex guess, int max_iterations=std::numeric_limit namespace detail { template - T diff_of_products (T a, T b, T c, T d) + inline T difference_of_products(T const & a, T const & b, T const & c, T const & d) { T w = d*c; T e = std::fma(-d, c, w); @@ -667,12 +667,15 @@ namespace detail } template -auto solve_quadratic(T const& a, T const& b, T const& c) +auto quadratic_roots(T const& a, T const& b, T const& c) { using std::copysign; using std::sqrt; if constexpr (std::is_integral::value) { + // What I want is to write: + // return quadratic_roots(double(a), double(b), double(c)); + // but that doesn't compile. double nan = std::numeric_limits::quiet_NaN(); if(a==0) { @@ -695,7 +698,7 @@ auto solve_quadratic(T const& a, T const& b, T const& c) double x0 = sqrt(x0_sq); return std::pair(-x0,x0); } - double discriminant = detail::diff_of_products(double(b), double(b), 4.0*double(a), double(c)); + double discriminant = detail::difference_of_products(double(b), double(b), 4.0*double(a), double(c)); if (discriminant < 0) { return std::pair(nan, nan); @@ -732,12 +735,11 @@ auto solve_quadratic(T const& a, T const& b, T const& c) T x0 = sqrt(x0_sq); return std::pair(-x0,x0); } - T discriminant = detail::diff_of_products(b, b, 4*a, c); + T discriminant = detail::difference_of_products(b, b, 4*a, c); // Is there a sane way to flush very small negative values to zero? // If there is I don't know of it. if (discriminant < 0) { - std::cout << "Discriminant = " << discriminant << "\n"; return std::pair(nan, nan); } T q = -(b + copysign(sqrt(discriminant), b))/T(2); diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 93b01ad965..b26b91f634 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -426,17 +426,17 @@ template void test_solve_real_quadratic() { Real tol = std::numeric_limits::epsilon(); - using boost::math::tools::solve_quadratic; - auto [x0, x1] = solve_quadratic(1, 0, -1); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots(1, 0, -1); BOOST_CHECK_CLOSE(x0, Real(-1), tol); BOOST_CHECK_CLOSE(x1, Real(1), tol); - auto p = solve_quadratic(7, 0, 0); + auto p = quadratic_roots(7, 0, 0); BOOST_CHECK_SMALL(p.first, tol); BOOST_CHECK_SMALL(p.second, tol); // (x-7)^2 = x^2 - 14*x + 49: - p = solve_quadratic(1, -14, 49); + p = quadratic_roots(1, -14, 49); BOOST_CHECK_CLOSE(p.first, Real(7), tol); BOOST_CHECK_CLOSE(p.second, Real(7), tol); @@ -446,7 +446,7 @@ void test_solve_real_quadratic() { // (x-1)(x-1-eps) = x^2 + (-eps - 2)x + (1)(1+eps) Real eps = 2*std::numeric_limits::epsilon(); - p = solve_quadratic(256, 256*(-2 - eps), 256*(1 + eps)); + p = quadratic_roots(256, 256*(-2 - eps), 256*(1 + eps)); BOOST_CHECK_CLOSE(p.first, Real(1), tol); BOOST_CHECK_CLOSE(p.second, Real(1) + eps, tol); } @@ -455,7 +455,7 @@ void test_solve_real_quadratic() { // Kahan's example: This is the test that demonstrates the necessity of the fma instruction. // https://en.wikipedia.org/wiki/Loss_of_significance#Instability_of_the_quadratic_equation - p = solve_quadratic(94906265.625, -189812534, 94906268.375); + p = quadratic_roots(94906265.625, -189812534, 94906268.375); BOOST_CHECK_CLOSE_FRACTION(p.first, Real(1), tol); BOOST_CHECK_CLOSE_FRACTION(p.second, 1.000000028975958, 4*tol); } @@ -465,17 +465,17 @@ template void test_solve_int_quadratic() { double tol = std::numeric_limits::epsilon(); - using boost::math::tools::solve_quadratic; - auto [x0, x1] = solve_quadratic(1, 0, -1); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots(1, 0, -1); BOOST_CHECK_CLOSE(x0, double(-1), tol); BOOST_CHECK_CLOSE(x1, double(1), tol); - auto p = solve_quadratic(7, 0, 0); + auto p = quadratic_roots(7, 0, 0); BOOST_CHECK_SMALL(p.first, tol); BOOST_CHECK_SMALL(p.second, tol); // (x-7)^2 = x^2 - 14*x + 49: - p = solve_quadratic(1, -14, 49); + p = quadratic_roots(1, -14, 49); BOOST_CHECK_CLOSE(p.first, double(7), tol); BOOST_CHECK_CLOSE(p.second, double(7), tol); } @@ -485,19 +485,19 @@ void test_solve_complex_quadratic() { using Real = typename Complex::value_type; Real tol = std::numeric_limits::epsilon(); - using boost::math::tools::solve_quadratic; - auto [x0, x1] = solve_quadratic({1,0}, {0,0}, {-1,0}); + using boost::math::tools::quadratic_roots; + auto [x0, x1] = quadratic_roots({1,0}, {0,0}, {-1,0}); BOOST_CHECK_CLOSE(x0.real(), Real(-1), tol); BOOST_CHECK_CLOSE(x1.real(), Real(1), tol); BOOST_CHECK_SMALL(x0.imag(), tol); BOOST_CHECK_SMALL(x1.imag(), tol); - auto p = solve_quadratic({7,0}, {0,0}, {0,0}); + auto p = quadratic_roots({7,0}, {0,0}, {0,0}); BOOST_CHECK_SMALL(p.first.real(), tol); BOOST_CHECK_SMALL(p.second.real(), tol); // (x-7)^2 = x^2 - 14*x + 49: - p = solve_quadratic({1,0}, {-14,0}, {49,0}); + p = quadratic_roots({1,0}, {-14,0}, {49,0}); BOOST_CHECK_CLOSE(p.first.real(), Real(7), tol); BOOST_CHECK_CLOSE(p.second.real(), Real(7), tol); From ae17d3a8a362136993df9ff488f31d91aff34dea Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 27 Dec 2018 14:22:12 -0700 Subject: [PATCH 30/46] Use quadratic_roots to simplify M2M4 SNR estimator. [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 13 +- .../boost/math/tools/signal_statistics.hpp | 136 ++++++------------ 2 files changed, 51 insertions(+), 98 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index b66acccefd..6995c7bf68 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -45,9 +45,15 @@ namespace boost{ namespace math{ namespace tools { template auto oracle_snr_db(Container const & signal, Container const & noisy_signal); + template + auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3); + template auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); + template + auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3); + template auto m2m4_snr_estimator_db(Container const & noisy_signal,typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); @@ -162,7 +168,7 @@ See [@https://doi.org/10.1109/26.871393 Pauluzzi and N.C. Beaulieu] and [@https std::vector noisy_signal(512); // fill noisy_signal with data contaminated by Gaussian white noise: - double est_snr = boost::math::tools::m2m4_snr_estimator_db(noisy_signal); + double est_snr_db = boost::math::tools::m2m4_snr_estimator_db(noisy_signal); The /M/[sub 2]/M/[sub 4] SNR estimator is an "in-service" estimator, meaning that the estimate is made using the noisy, data-bearing signal, and does not require a background estimate. This estimator has been found to be work best between roughly -3 and 15db, tending to overestimate the noise below -3db, and underestimate the noise above 15db. @@ -200,11 +206,12 @@ and hence it should really be compared to the mean-invariant SNR. /Nota bene/: This computation requires the solution of a system of quadratic equations involving the noise kurtosis, the signal kurtosis, and the second and fourth moments of the data. There is no guarantee that a solution of this system exists for all value of these parameters, in fact nonexistence can easily be demonstrated for certain data. If there is no solution to the system, then failure is communicated by returning NaNs. +This happens distressingly often; if a user is aware of any blind SNR estimators which do not suffer from this drawback, please open a github ticket and let us know. The author has not managed to fully characterize the conditions under which a real solution with /S > 0/ and /N >0/ exists. However, a very intuitive example demonstrates why nonexistence can occur. -One case is where both the signal and noise kurtosis are assumed to be equal to three. -Then the method has no mechanism for distinguishing the signal from the noise, and the solution is non-unique. +Suppose the signal and noise kurtosis are equal. +Then the method has no way to distinguish between the signal and the noise, and the solution is non-unique. [heading References] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index ae0ad1fd3f..3f03f7352c 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -252,13 +253,12 @@ auto oracle_snr_db(Container const & signal, Container const & noisy_signal) // D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. // A nice python implementation: // https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py - -template -auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +template +auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) { BOOST_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive"); BOOST_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive."); - using Real = typename Container::value_type; + using Real = typename std::iterator_traits::value_type; using std::sqrt; if constexpr (std::is_floating_point::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) @@ -272,7 +272,7 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val // However, I can't prove that, so there is a chance that this does unnecessary work. // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here. // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711 - auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(noisy_signal); + auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(first, last); if (M4 == 0) { // The signal is constant. There is no noise: @@ -287,120 +287,66 @@ auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::val Real cs = kw*M2*M2 - M4; Real bn = 2*M2*(3-ka); Real cn = ka*M2*M2 - M4; - Real N, S; - if(kw == 3) + auto [S0, S1] = boost::math::tools::quadratic_roots(a, bs, cs); + if (S1 > 0) { - if (ka == 3) + auto N = M2 - S1; + if (N > 0) { - // When ka = kw = 3, then either the system is inconsistent, or the system does not have a unique solution: - return std::numeric_limits::quiet_NaN(); + return S1/N; } - Real Ssq = -cs/a; - if (Ssq < 0) + if (S0 > 0) { - Real radicand = bn*bn - 4*a*cn; - if (radicand < 0) - { - return std::numeric_limits::quiet_NaN(); - } - N = (-bn + sqrt(radicand))/(2*a); - if (N < 0) - { - N = (-bn - sqrt(radicand))/(2*a); - if (N < 0) - { - return std::numeric_limits::quiet_NaN(); - } - S = M2 - N; - if (S < 0) - { - return std::numeric_limits::quiet_NaN(); - } - return S/N; - } - S = M2 - N; - if (S < 0) + N = M2 - S0; + if (N > 0) { - return std::numeric_limits::quiet_NaN(); + return S0/N; } - return S/N; - } - S = sqrt(Ssq); - N = M2 - S; - if (N < 0) - { - return std::numeric_limits::quiet_NaN(); } - return S/N; } - - // Maybe I should look for some very small distance from 6, but . . . - if (ka+kw == 6) + auto [N0, N1] = boost::math::tools::quadratic_roots(a, bn, cn); + if (N1 > 0) { - // In this case we don't need to solve a quadratic equation: - S = -cs/bs; - N = -cn/bn; - if (S/N < 0) + auto S = M2 - N1; + if (S > 0) { - return std::numeric_limits::quiet_NaN(); + return S/N1; } - return S/N; - } - - // The special cases have been taken care of. - // Now we must resort to solving a full quadratic. - Real radicand = bs*bs - 4*a*cs; - if (radicand < 0) - { - // See if we have a solution for N: - radicand = bn*bn - 4*a*cn; - if (radicand < 0) + if (N0 > 0) { - // Both S and N are complex: - return std::numeric_limits::quiet_NaN(); - } - // N is real. Can it be made positive? - N = (-bn + sqrt(radicand))/(2*a); - if (N < 0) - { - N = (-bn - sqrt(radicand))/(2*a); - if (N < 0) + S = M2 - N0; + if (S > 0) { - return std::numeric_limits::quiet_NaN(); + return S/N0; } } - S = M2 - N; - if (S < 0) - { - return std::numeric_limits::quiet_NaN(); - } - return S/N; } - - S = (-bs + sqrt(radicand))/(2*a); - if (S < 0) - { - S = (-bs - sqrt(radicand))/(2*a); - if (S < 0) - { - return std::numeric_limits::quiet_NaN(); - } - } - N = M2 - S; - if (N < 0) - { - return std::numeric_limits::quiet_NaN(); - } - return S/N; + // This happens distressingly often. It's a limitation of the method. + return std::numeric_limits::quiet_NaN(); } else { BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); + return *first; } } template -auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +inline auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + return m2m4_snr_estimator(noisy_signal.cbegin(), noisy_signal.cend(), estimated_signal_kurtosis, estimated_noise_kurtosis); +} + +template +inline auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(first, last, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + + +template +inline auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) { using std::log10; return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); From 7c82eb75cf9d45b3f11ba79e0bc46843b57c4824 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 27 Dec 2018 15:02:36 -0700 Subject: [PATCH 31/46] Test for scale invariance of M2M4 SNR estimator. [CI SKIP] --- test/signal_statistics_test.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index bf247f2cf2..e2d05fbd41 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -18,6 +18,7 @@ #include #include +using std::abs; using boost::multiprecision::cpp_bin_float_50; using boost::multiprecision::cpp_complex_50; using boost::math::constants::two_pi; @@ -308,7 +309,6 @@ void test_oracle_snr() template void test_integer_oracle_snr() { - using std::abs; double tol = std::numeric_limits::epsilon(); size_t length = 100; std::vector signal(length, 1); @@ -342,12 +342,14 @@ void test_complex_oracle_snr() template void test_m2m4_snr_estimator() { + Real tol = std::numeric_limits::epsilon(); std::vector signal(5000, 1); std::vector x(signal.size()); std::mt19937 gen(18); std::normal_distribution dis{0, 1.0}; - for (size_t i = 0; i < x.size(); ++i) { + for (size_t i = 0; i < x.size(); ++i) + { signal[i] = 5*sin(100*6.28*i/x.size()); x[i] = signal[i] + dis(gen); } @@ -368,6 +370,18 @@ void test_m2m4_snr_estimator() oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); // The performance depends on the exact numbers generated by the distribution, but this isn't bad: BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); + + // The SNR estimator should be scale invariant. + // If x has snr y, then kx should have snr y. + Real ka = 1.5; + Real kw = 1.8; + auto m2m4 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + for(size_t i = 0; i < x.size(); ++i) + { + x[i] *= 4096; + } + auto m2m4_2 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + BOOST_TEST(abs(m2m4 - m2m4_2) < tol); } int main() From ff09a81d672f0c428d0515eeae46d6a87aaf223a Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 27 Dec 2018 18:21:07 -0700 Subject: [PATCH 32/46] Reduce the number of stack variables and hopefully increase the probability of inlining discriminant calculation [CI SKIP] --- include/boost/math/tools/roots.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index decdf19a32..bc6bc6e1a1 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -657,11 +657,11 @@ Complex complex_newton(F g, Complex guess, int max_iterations=std::numeric_limit namespace detail { template - inline T difference_of_products(T const & a, T const & b, T const & c, T const & d) + inline T discriminant(T const & a, T const & b, T const & c) { - T w = d*c; - T e = std::fma(-d, c, w); - T f = std::fma(a, b, -w); + T w = 4*a*c; + T e = std::fma(-c, 4*a, w); + T f = std::fma(b, b, -w); return f + e; } } @@ -698,7 +698,7 @@ auto quadratic_roots(T const& a, T const& b, T const& c) double x0 = sqrt(x0_sq); return std::pair(-x0,x0); } - double discriminant = detail::difference_of_products(double(b), double(b), 4.0*double(a), double(c)); + double discriminant = detail::discriminant(double(a), double(b), double(c)); if (discriminant < 0) { return std::pair(nan, nan); @@ -735,7 +735,7 @@ auto quadratic_roots(T const& a, T const& b, T const& c) T x0 = sqrt(x0_sq); return std::pair(-x0,x0); } - T discriminant = detail::difference_of_products(b, b, 4*a, c); + T discriminant = detail::discriminant(a, b, c); // Is there a sane way to flush very small negative values to zero? // If there is I don't know of it. if (discriminant < 0) From df447ae00d44f8701ad3d6fa6753c2202016483e Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 7 Jan 2019 10:50:06 -0700 Subject: [PATCH 33/46] Rename 'absolute_median' to 'median_absolute_deviation' [CI SKIP] --- doc/vector_functionals/signal_statistics.qbk | 22 +--- .../univariate_statistics.qbk | 29 ++++- .../boost/math/tools/signal_statistics.hpp | 33 +----- .../math/tools/univariate_statistics.hpp | 37 ++++++- test/signal_statistics_test.cpp | 103 ------------------ test/univariate_statistics_test.cpp | 72 ++++++++++++ 6 files changed, 139 insertions(+), 157 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 6995c7bf68..b7a483aa51 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -13,13 +13,7 @@ `` #include -namespace boost{ namespace math{ namespace tools { - - template - auto absolute_median(Container & c); - - template - auto absolute_median(ForwardIterator first, ForwardIterator last); +namespace boost::math::tools { template auto absolute_gini_coefficient(Container & c); @@ -57,7 +51,7 @@ namespace boost{ namespace math{ namespace tools { template auto m2m4_snr_estimator_db(Container const & noisy_signal,typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimate_noise_kurtosis=3); -}}} +} `` [heading Description] @@ -69,18 +63,6 @@ In general, you can store your data in an Eigen array, and Armadillo vector, `st These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. For certain operations (total variation, for example) integer inputs are supported. -[heading Absolute Median] - -The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance. -See [@https://wavelet-tour.github.io/ Mallat] for details. -The absolute median supports both real and complex arithmetic, modifies its input, and requires random access containers. - - std::vector v{-1, 1}; - double m = boost::math::tools::absolute_median(v); - // m = 1 - // Alternative syntax, using a subset of the container: - m = boost::math::tools::absolute_median(v.begin(), v.begin() + 1); - [heading Absolute Gini Coefficient] The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis. diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 6d448ad52c..92fede661f 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -63,6 +63,12 @@ namespace boost{ namespace math{ namespace tools { template auto median(ForwardIterator first, ForwardIterator last); + template + auto median_absolute_deviation(ForwardIterator first, ForwardIterator last, typename std::iterator_traits::value_type center=std::numeric_limits::quiet_NaN()); + + template + auto median_absolute_deviation(RandomAccessContainer v, typename RandomAccessContainer::value_type center=std::numeric_limits::quiet_NaN()); + template auto gini_coefficient(Container & c); @@ -169,7 +175,7 @@ Simultaneously computes the first four [@https://en.wikipedia.org/wiki/Central_m [heading Median] -Compute the median of a dataset: +Computes the median of a dataset: std::vector v{1,2,3,4,5}; double m = boost::math::tools::median(v.begin(), v.end()); @@ -179,6 +185,27 @@ The calculation of the median is a thin wrapper around the C++11 [@https://en.cp Therefore, all requirements of `std::nth_element` are inherited by the median calculation. In particular, the container must allow random access. +[heading Median Absolute Deviation] + +Computes the [@https://en.wikipedia.org/wiki/Median_absolute_deviation median absolute deviation] of a dataset: + + std::vector v{1,2,3,4,5}; + double mad = boost::math::tools::median_absolute_deviation(v); + +By default, the deviation from the median is used. +If you have some prior that the median is zero, or wish to compute the median absolute deviation from the mean, +use the following: + + // prior is that center is zero: + double center = 0; + double mad = boost::math::tools::median_absolute_deviation(v, center); + + // compute median absolute deviation from the mean: + double mu = boost::math::tools::mean(v); + double mad = boost::math::tools::median_absolute_deviation(v, mu); + +/Nota bene:/ The input vector is modified. +Again the vector is passed into a call to [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. [heading Gini Coefficient] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 3f03f7352c..d2fbace685 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -15,36 +15,7 @@ #include -namespace boost{ namespace math{ namespace tools { - -template -auto absolute_median(RandomAccessIterator first, RandomAccessIterator last) -{ - using std::abs; - using RealOrComplex = typename std::iterator_traits::value_type; - size_t num_elems = std::distance(first, last); - BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); - auto comparator = [](RealOrComplex a, RealOrComplex b) { return abs(a) < abs(b);}; - if (num_elems & 1) - { - auto middle = first + (num_elems - 1)/2; - std::nth_element(first, middle, last, comparator); - return abs(*middle); - } - else - { - auto middle = first + num_elems/2 - 1; - std::nth_element(first, middle, last, comparator); - std::nth_element(middle, middle+1, last, comparator); - return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); - } -} - -template -inline auto absolute_median(RandomAccessContainer & v) -{ - return absolute_median(v.begin(), v.end()); -} +namespace boost::math::tools { template auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) @@ -352,5 +323,5 @@ inline auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Cont return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); } -}}} +} #endif diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 0c5b2dca89..8560f2d7f5 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -13,7 +13,7 @@ #include -namespace boost{ namespace math{ namespace tools { +namespace boost::math::tools { template auto mean(ForwardIterator first, ForwardIterator last) @@ -382,7 +382,40 @@ inline auto sample_gini_coefficient(RandomAccessContainer & v) return sample_gini_coefficient(v.begin(), v.end()); } +template +auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits::value_type center=std::numeric_limits::value_type>::quiet_NaN()) +{ + using std::abs; + using Real = typename std::iterator_traits::value_type; + using std::isnan; + if (isnan(center)) + { + center = boost::math::tools::median(first, last); + } + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);}; + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last, comparator); + return abs(*middle); + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last, comparator); + std::nth_element(middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); + } +} +template +inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits::quiet_NaN()) +{ + return median_absolute_deviation(v.begin(), v.end(), center); +} -}}} + +} #endif diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index e2d05fbd41..330b629f58 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -33,99 +33,6 @@ using boost::math::constants::two_pi; * 6) Does it work with integer data if sensible? */ -template -void test_absolute_median() -{ - std::vector v{-1, 2, -3, 4, -5, 6, -7}; - - Real m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 4); - - std::mt19937 g(12); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v); - BOOST_TEST_EQ(m, 4); - - v = {1, -2, -3, 3, -4, -5}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - - v = {-1}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {-1, 1}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {2, -4}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - - v = {1, -1, 1}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - v = {1, 2, -3}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 2); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 2); - - std::array w{1, 2, -3}; - m = boost::math::tools::absolute_median(w); - BOOST_TEST_EQ(m, 2); - - // boost.ublas vector? - boost::numeric::ublas::vector u(6); - u[0] = 1; - u[1] = 2; - u[2] = -3; - u[3] = 1; - u[4] = 2; - u[5] = -3; - m = boost::math::tools::absolute_median(u); - BOOST_TEST_EQ(m, 2); -} - - -template -void test_complex_absolute_median() -{ - typedef typename Complex::value_type Real; - std::mt19937 g(18); - std::vector v{{0,1}, {0,-2},{0,3}, {0,-4}, {0,5}, {0,-6}, {0,7}}; - - Real m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 4); - - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v); - BOOST_TEST_EQ(m, 4); - - v = {{0,1}, {0,-2}, {0,-3}, {0,3}, {0,4}, {0,-5}}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 3); - - v = {{0, -1}}; - m = boost::math::tools::absolute_median(v.begin(), v.end()); - BOOST_TEST_EQ(m, 1); - - boost::numeric::ublas::vector w(1); - w[0] = {0, -1}; - m = boost::math::tools::absolute_median(w); - BOOST_TEST_EQ(m, 1); - -} - - template void test_hoyer_sparsity() { @@ -386,16 +293,6 @@ void test_m2m4_snr_estimator() int main() { - test_absolute_median(); - test_absolute_median(); - test_absolute_median(); - test_absolute_median(); - - test_complex_absolute_median>(); - test_complex_absolute_median>(); - test_complex_absolute_median>(); - test_complex_absolute_median(); - test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); test_absolute_gini_coefficient(); diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index a4baa0e9b2..c247ed013f 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -347,6 +347,73 @@ void test_median() BOOST_TEST_EQ(m, 2); } +template +void test_median_absolute_deviation() +{ + std::vector v{-1, 2, -3, 4, -5, 6, -7}; + + Real m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 4); + + std::mt19937 g(12); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v, 0); + BOOST_TEST_EQ(m, 4); + + v = {1, -2, -3, 3, -4, -5}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + + v = {-1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + + v = {-1, 1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + // The median is zero, so coincides with the default: + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end()); + BOOST_TEST_EQ(m, 1); + + m = boost::math::tools::median_absolute_deviation(v); + BOOST_TEST_EQ(m, 1); + + + v = {2, -4}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 3); + + v = {1, -1, 1}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 1); + + v = {1, 2, -3}; + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 2); + std::shuffle(v.begin(), v.end(), g); + m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + BOOST_TEST_EQ(m, 2); + + std::array w{1, 2, -3}; + m = boost::math::tools::median_absolute_deviation(w, 0); + BOOST_TEST_EQ(m, 2); + + // boost.ublas vector? + boost::numeric::ublas::vector u(6); + u[0] = 1; + u[1] = 2; + u[2] = -3; + u[3] = 1; + u[4] = 2; + u[5] = -3; + m = boost::math::tools::median_absolute_deviation(u, 0); + BOOST_TEST_EQ(m, 2); +} + + template void test_sample_gini_coefficient() { @@ -471,6 +538,11 @@ int main() test_median(); test_median(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_median_absolute_deviation(); + test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); From 8d267da5e1bd59ccc612087ff7f4c50c2d70b3cc Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 11 Jan 2019 14:23:39 -0700 Subject: [PATCH 34/46] Add hamming_distance, l1_distance, l2_distance, sup_distance, lp_distance. Add more tests for integer datatypes. [CI SKIP] --- doc/vector_functionals/norms.qbk | 131 +++++++++-- include/boost/math/tools/norms.hpp | 341 +++++++++++++++++++++++++++-- test/norms_test.cpp | 217 +++++++++++++++++- 3 files changed, 655 insertions(+), 34 deletions(-) diff --git a/doc/vector_functionals/norms.qbk b/doc/vector_functionals/norms.qbk index e8e9183c16..b809540b16 100644 --- a/doc/vector_functionals/norms.qbk +++ b/doc/vector_functionals/norms.qbk @@ -21,18 +21,36 @@ namespace boost{ namespace math{ namespace tools { template auto l0_pseudo_norm(ForwardIterator first, ForwardIterator last); + template + size_t hamming_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + size_t hamming_distance(Container const & u, Container const & v); + template auto l1_norm(Container const & c); template auto l1_norm(ForwardIterator first, ForwardIterator last); + template + auto l1_distance(Container const & v1, Container const & v2); + + template + auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + template auto l2_norm(Container const & c); template auto l2_norm(ForwardIterator first, ForwardIterator last); + template + auto l2_distance(Container const & v1, Container const & v2); + + template + auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + template auto sup_norm(Container const & c); @@ -40,10 +58,22 @@ namespace boost{ namespace math{ namespace tools { auto sup_norm(ForwardIterator first, ForwardIterator last); template - auto lp_norm(Container const & c); + auto sup_distance(Container const & v1, Container const & v2); template - auto lp_norm(ForwardIterator first, ForwardIterator last, p); + auto sup_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2); + + template + auto lp_norm(Container const & c, unsigned p); + + template + auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p); + + template + auto lp_distance(Container const & v1, Container const & v2, unsigned p); + + template + auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2, unsigned p); template auto total_variation(Container const & c); @@ -59,9 +89,9 @@ namespace boost{ namespace math{ namespace tools { The file `boost/math/tools/norms.hpp` is a set of facilities for computing scalar values traditionally useful in numerical analysis from vectors. Our examples use `std::vector` to hold the data, but this not required. -In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. +In general, you can store your data in an Eigen array, an Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. -For certain operations (total variation, for example) integer inputs are supported. +Integral datatypes are supported for most routines. [heading \u2113[super \u221E] norm] @@ -72,12 +102,27 @@ Computes the supremum norm of a dataset: // sup = 3 std::vector> v{{0, -8}, {1,1}, {-3,2}}; - double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend()); + // Range call: + double sup = boost::math::tools::sup_norm(v); // sup = 8 Supports real, integral, and complex arithmetic. Container must be forward iterable and is not modified. +[heading \u2113[super \u221E] distance] + +Computes the supremum norm distance between two vectors: + + std::vector v{-3, 2, 1}; + std::vector w{6, -2, 1}; + double sup = boost::math::tools::sup_distance(w, v); + // sup = 9 + +Supports real, integral, and complex arithmetic. +Container must be forward iterable and is not modified. +If the input it integral, the output is a double precision float. + + [heading \u2113[super /p/] norm] std::vector v{-8, 0, 0}; @@ -85,12 +130,32 @@ Container must be forward iterable and is not modified. // sup = 8 std::vector> v{{1, 0}, {0,1}, {0,-1}}; - double sup = boost::math::tools::sup_norm(v.cbegin(), v.cend(), 3); + double sup = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); // sup = cbrt(3) -Supports both real and complex arithmetic. +Supports both real, integral, and complex arithmetic. +If the input is integral, the output is a double precision float. The container must be forward iterable and the contents are not modified. +Only supports integral /p/ for two reasons: The computation is much slower for real /p/, and the non-integral \u2113[super /p/] norm is rarely used. + +[heading \u2113[super /p/] distance] + + std::vector v{-8, 0, 0}; + std::vector w{8, 0, 0}; + double dist = boost::math::tools::lp_distance(v, w, 7); + // dist = 16 + + std::vector> v{{1, 0}, {0,1}, {0,-1}}; + double dist = boost::math::tools::lp_distance(v, v, 3); + // dist = 0 + +Supports both real, integral, and complex arithmetic. +If the input is integral, the output is a double precision float. +The container must be forward iterable and the contents are not modified. + +Only supports integer /p/. + [heading \u2113[super 0] pseudo-norm] Counts the number of non-zero elements in a container. @@ -102,7 +167,18 @@ Counts the number of non-zero elements in a container. Supports real, integral, and complex numbers. The container must be forward iterable and the contents are not modified. Note that this measure is not robust against numerical noise and is therefore not as useful as (say) the Hoyer sparsity in numerical applications. -Works will real, complex, and integral inputs. +Works with real, complex, and integral inputs. + +[heading Hamming Distance] + +Compute the number of non-equal elements between two vectors /w/ and /v/: + + std::vector v{0,0,1}; + std::vector w{1,0,0}; + size_t count = boost::math::tools::hamming_distance(w, v); + // count = 2 + +Works for any datatype for which the operator `!=` is defined. [heading \u2113[super 1] norm] @@ -114,15 +190,43 @@ The \u2113[super 1] norm is a special case of the \u2113[super /p/] norm, but is Requires a forward iterable input, does not modify input data, and works with real, integral, and complex numbers. +[heading \u2113[super 1] distance] + +Computes the \u2113[super 1] distance between two vectors: + + std::vector v{1,1,1}; + std::vector w{1,1,1}; + double dist = boost::math::tools::l1_distance(w, v); + // dist = 0 + +Requires a forward iterable inputs, does not modify input data, and works with real, integral, and complex numbers. +If the input type is integral, the output is a double precision float. + + [heading \u2113[super 2] norm] The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm, but is much faster: std::vector v{1,1,1}; - double l1 = boost::math::tools::l2_norm(v.begin(), v.end()); - // l1 = sqrt(3) + double l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + // l2 = sqrt(3) + +Requires a forward iterable input, does not modify input data, and works with real, complex and integral data. +If the input is integral, the output is a double precision float. + + +[heading \u2113[super 2] distance] + +Compute the \u2113[super 2] distance between two vectors /w/ and /v/: + + std::vector v{1,1,1}; + std::vector w{1,2,1}; + double dist = boost::math::tools::l2_distance(w, v); + // dist = 1 + +Requires a forward iterable input, does not modify input data, and works with real, complex numbers, and integral data. +If the input type is integral, the output is a double precision float. -Requires a forward iterable input, does not modify input data, and works with real and complex numbers. [heading Total Variation] @@ -133,9 +237,11 @@ Requires a forward iterable input, does not modify input data, and works with re double tv = boost::math::tools::total_variation(v.begin(), v.end()); // variation is 1, so tv = 1. std::vector v{1,1,1}; - int tv = boost::math::tools::total_variation(v); + double tv = boost::math::tools::total_variation(v); The total variation only supports real numbers and integers. +If the input is integral, the output is a double precision float. + All the constituent operations to compute the total variation are well-defined for complex numbers, but the computed result is not meaningful; a 2D total variation is more appropriate. The container must be forward iterable, and the contents are not modified. @@ -148,7 +254,6 @@ However, it satisfies the triangle inequality and is absolutely 1-homogeneous, s * Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002. * Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008. * Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741. -* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001. [endsect] [/section:norms Norms] diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp index 449d2eb530..1533426a34 100644 --- a/include/boost/math/tools/norms.hpp +++ b/include/boost/math/tools/norms.hpp @@ -5,7 +5,6 @@ #ifndef BOOST_MATH_TOOLS_NORMS_HPP #define BOOST_MATH_TOOLS_NORMS_HPP - #include #include #include @@ -13,7 +12,7 @@ #include -namespace boost{ namespace math{ namespace tools { +namespace boost::math::tools { // Mallat, "A Wavelet Tour of Signal Processing", equation 2.60: template @@ -22,12 +21,12 @@ auto total_variation(ForwardIterator first, ForwardIterator last) using Real = typename std::iterator_traits::value_type; using std::abs; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); - Real tv = 0; auto it = first; Real tmp = *it; if constexpr (std::is_unsigned::value) { + double tv = 0; while (++it != last) { if (*it > tmp) @@ -44,6 +43,7 @@ auto total_variation(ForwardIterator first, ForwardIterator last) } else { + Real tv = 0; while (++it != last) { tv += abs(*it - tmp); @@ -163,6 +163,16 @@ auto l2_norm(ForwardIterator first, ForwardIterator last) } return result; } + else + { + double l2 = 0; + for (auto it = first; it != last; ++it) + { + double tmp = *it; + l2 += tmp*tmp; + } + return sqrt(l2); + } } template @@ -193,8 +203,31 @@ inline size_t l0_pseudo_norm(Container const & v) } template -auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_const())>::type>::type p) +size_t hamming_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + size_t count = 0; + auto it1 = first1; + auto it2 = first2; + while (it1 != last1) + { + if (*it1++ != *it2++) + { + ++count; + } + } + return count; +} + +template +inline size_t hamming_distance(Container const & v, Container const & w) +{ + return hamming_distance(v.cbegin(), v.cend(), w.cbegin()); +} + +template +auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p) { + using std::abs; using std::pow; using std::is_floating_point; using std::isfinite; @@ -202,25 +235,24 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { - BOOST_ASSERT_MSG(p.real() >= 0, "For p < 0, the lp norm is not a norm."); - BOOST_ASSERT_MSG(p.imag() == 0, "For imaginary p, the lp norm is not a norm."); using std::norm; - decltype(p.real()) lp = 0; + using Real = typename RealOrComplex::value_type; + Real lp = 0; for (auto it = first; it != last; ++it) { - lp += pow(norm(*it), p.real()/2); + lp += pow(abs(*it), p); } - auto result = pow(lp, 1/p.real()); + auto result = pow(lp, Real(1)/Real(p)); if (!isfinite(result)) { auto a = boost::math::tools::sup_norm(first, last); - decltype(p.real()) lp = 0; + Real lp = 0; for (auto it = first; it != last; ++it) { - lp += pow(abs(*it)/a, p.real()); + lp += pow(abs(*it)/a, p); } - result = a*pow(lp, 1/p.real()); + result = a*pow(lp, Real(1)/Real(p)); } return result; } @@ -235,7 +267,7 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c lp += pow(abs(*it), p); } - RealOrComplex result = pow(lp, 1/p); + RealOrComplex result = pow(lp, RealOrComplex(1)/RealOrComplex(p)); if (!isfinite(result)) { RealOrComplex a = boost::math::tools::sup_norm(first, last); @@ -244,21 +276,296 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, typename std::remove_c { lp += pow(abs(*it)/a, p); } - result = a*pow(lp, 1/p); + result = a*pow(lp, RealOrComplex(1)/RealOrComplex(p)); } return result; } else { - BOOST_ASSERT_MSG(false, "Unable to determine if the input type is real or complex."); + double lp = 0; + + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it), p); + } + + double result = pow(lp, 1.0/double(p)); + if (!isfinite(result)) + { + double a = boost::math::tools::sup_norm(first, last); + lp = 0; + for (auto it = first; it != last; ++it) + { + lp += pow(abs(*it)/a, p); + } + result = a*pow(lp, double(1)/double(p)); + } + return result; } } template -inline auto lp_norm(Container const & v, typename Container::value_type p) +inline auto lp_norm(Container const & v, unsigned p) { return lp_norm(v.cbegin(), v.cend(), p); } -}}} + +template +auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2, unsigned p) +{ + using std::pow; + using std::abs; + using std::is_floating_point; + using std::isfinite; + using RealOrComplex = typename std::iterator_traits::value_type; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename RealOrComplex::value_type; + using std::norm; + Real dist = 0; + auto it1 = first1; + auto it2 = first2; + while(it1 != last1) + { + auto tmp = *it1++ - *it2++; + dist += pow(abs(tmp), p); + } + return pow(dist, Real(1)/Real(p)); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + RealOrComplex dist = 0; + auto it1 = first1; + auto it2 = first2; + while(it1 != last1) + { + auto tmp = *it1++ - *it2++; + dist += pow(abs(tmp), p); + } + + return pow(dist, RealOrComplex(1)/RealOrComplex(p)); + } + else + { + BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); + double dist = 0; + + auto it1 = first1; + auto it2 = first2; + while(it1 != last1) + { + double tmp = *it1++ - *it2++; + dist += pow(abs(tmp), p); + } + return pow(dist, 1.0/double(p)); + } +} + +template +inline auto lp_distance(Container const & v, Container const & w, unsigned p) +{ + return lp_distance(v.cbegin(), v.cend(), w.cbegin(), p); +} + + +template +auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sum = 0; + while (it1 != last1) { + sum += abs(*it1++ - *it2++); + } + return sum; + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sum = 0; + while (it1 != last1) + { + sum += abs(*it1++ - *it2++); + } + return sum; + } + else + { + // Why choose double precision? + // First, consistency: l2 and lp distance cannot be returned as floating point types. + // Second, if the type is a small integer type (like uint8_t), then the result will overflow. + double sum = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + if (x1 > x2) + { + sum += (x1 - x2); + } + else + { + sum += (x2 - x1); + } + } + return sum; + } +} + +template +auto l1_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "L1 distance requires both containers to have the same number of elements"); + return l1_distance(v.cbegin(), v.cend(), w.begin()); +} + +template +auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sum = 0; + while (it1 != last1) { + sum += norm(*it1++ - *it2++); + } + return sqrt(sum); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sum = 0; + while (it1 != last1) + { + T tmp = *it1++ - *it2++; + sum += tmp*tmp; + } + return sqrt(sum); + } + else // integral values: + { + double sum = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + if (x1 > x2) + { + double tmp = x1-x2; + sum += tmp*tmp; + } + else + { + double tmp = x2 - x1; + sum += tmp*tmp; + } + } + return sqrt(sum); + } +} + +template +auto l2_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "L2 distance requires both containers to have the same number of elements"); + return l2_distance(v.cbegin(), v.cend(), w.begin()); +} + +template +auto sup_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator first2) +{ + using std::abs; + using std::norm; + using std::sqrt; + using std::is_floating_point; + using std::isfinite; + using T = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + using Real = typename T::value_type; + Real sup_sq = 0; + while (it1 != last1) { + Real tmp = norm(*it1++ - *it2++); + if (tmp > sup_sq) { + sup_sq = tmp; + } + } + return sqrt(sup_sq); + } + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + T sup = 0; + while (it1 != last1) + { + T tmp = *it1++ - *it2++; + if (sup < abs(tmp)) + { + sup = abs(tmp); + } + } + return sup; + } + else // integral values: + { + double sup = 0; + while(it1 != last1) + { + T x1 = *it1++; + T x2 = *it2++; + double tmp; + if (x1 > x2) + { + tmp = x1-x2; + } + else + { + tmp = x2 - x1; + } + if (sup < tmp) { + sup = tmp; + } + } + return sup; + } +} + +template +auto sup_distance(Container const & v, Container const & w) +{ + using std::size; + BOOST_ASSERT_MSG(size(v) == size(w), + "sup distance requires both containers to have the same number of elements"); + return sup_distance(v.cbegin(), v.cend(), w.begin()); +} + + +} #endif diff --git a/test/norms_test.cpp b/test/norms_test.cpp index af7a89d582..bffe2b6c22 100644 --- a/test/norms_test.cpp +++ b/test/norms_test.cpp @@ -4,7 +4,7 @@ * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ - +#include #include #include #include @@ -17,6 +17,9 @@ #include #include +using std::abs; +using std::pow; +using std::sqrt; using boost::multiprecision::cpp_bin_float_50; using boost::multiprecision::cpp_complex_50; @@ -76,14 +79,55 @@ void test_complex_lp() l3 = boost::math::tools::lp_norm(v, 3); BOOST_TEST(abs(l3 - 1) < tol); +} + +template +void test_integer_lp() +{ + double tol = 50*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + double l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + BOOST_TEST(abs(l3 - 1) < tol); +} + +template +void test_lp_distance() +{ + Real tol = 50*std::numeric_limits::epsilon(); + + std::vector u{1,0,0}; + std::vector v{0,0,0}; + + Real dist = boost::math::tools::lp_distance(u,u, 3); + BOOST_TEST(abs(dist) < tol); + dist = boost::math::tools::lp_distance(u,v, 3); + BOOST_TEST(abs(dist - 1) < tol); } +template +void test_complex_lp_distance() +{ + using Real = typename Complex::value_type; + Real tol = 50*std::numeric_limits::epsilon(); + + std::vector u{{1,0},{0,0},{0,0}}; + std::vector v{{0,0},{0,0},{0,0}}; + + Real dist = boost::math::tools::lp_distance(u,u, 3); + BOOST_TEST(abs(dist) < tol); + + dist = boost::math::tools::lp_distance(u,v, 3); + BOOST_TEST(abs(dist - 1) < tol); +} + + template void test_integer_total_variation() { std::vector v{1,1}; - Z tv = boost::math::tools::total_variation(v); + double tv = boost::math::tools::total_variation(v); BOOST_TEST_EQ(tv,0); v[1] = 2; @@ -163,7 +207,6 @@ void test_sup_norm() std::array w{-2,1,0}; s = boost::math::tools::sup_norm(w); BOOST_TEST(abs(s - 2) < tol); - } template @@ -218,7 +261,18 @@ void test_complex_l0_pseudo_norm() count = boost::math::tools::l0_pseudo_norm(v); BOOST_TEST_EQ(count, 1); +} +template +void test_hamming_distance() +{ + std::vector v{1,2,3}; + std::vector w{1,2,4}; + size_t count = boost::math::tools::hamming_distance(v, w); + BOOST_TEST_EQ(count, 1); + + count = boost::math::tools::hamming_distance(v, v); + BOOST_TEST_EQ(count, 0); } template @@ -256,9 +310,55 @@ void test_complex_l1_norm() l1 = boost::math::tools::l1_norm(v); BOOST_TEST(abs(l1 - 3) < tol); +} + +template +void test_l1_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3}; + std::vector w{1,1,1}; + Real l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + l1 = boost::math::tools::l1_distance(w, v); + BOOST_TEST(abs(l1 - 3) < tol); + l1 = boost::math::tools::l1_distance(v, w); + BOOST_TEST(abs(l1 - 3) < tol); } +template +void test_integer_l1_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,2,3}; + std::vector w{1,1,1}; + double l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + l1 = boost::math::tools::l1_distance(w, v); + BOOST_TEST(abs(l1 - 3) < tol); + + l1 = boost::math::tools::l1_distance(v, w); + BOOST_TEST(abs(l1 - 3) < tol); +} + +template +void test_complex_l1_distance() +{ + typedef typename Complex::value_type Real; + Real tol = std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}}; + Real l1 = boost::math::tools::l1_distance(v, v); + BOOST_TEST(abs(l1) < tol); + + std::vector w{{2,0}, {0,1},{0,-1}}; + l1 = boost::math::tools::l1_distance(v.cbegin(), v.cend(), w.cbegin()); + BOOST_TEST(abs(l1 - 1) < tol); +} + + template void test_l2_norm() { @@ -284,10 +384,18 @@ void test_l2_norm() BOOST_TEST(abs(l2 - bignum) < tol*l2); } +template +void test_integer_l2_norm() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + double l2 = boost::math::tools::l2_norm(v.begin(), v.end()); + BOOST_TEST(abs(l2 - 2) < tol); +} + template void test_complex_l2_norm() { - using std::sqrt; typedef typename Complex::value_type Real; Real tol = 100*std::numeric_limits::epsilon(); std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; @@ -296,9 +404,74 @@ void test_complex_l2_norm() l2 = boost::math::tools::l2_norm(v); BOOST_TEST(abs(l2 - 2) < tol); +} + +template +void test_l2_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + Real l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); +} + + +template +void test_integer_l2_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + double l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); +} + +template +void test_complex_l2_distance() +{ + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::l2_distance(v, v); + BOOST_TEST(abs(l2) < tol); +} +template +void test_sup_distance() +{ + Real tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + std::vector w{0,0,0,0}; + Real sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + sup = boost::math::tools::sup_distance(v, w); + BOOST_TEST(abs(sup -1) < tol); } + +template +void test_integer_sup_distance() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,1,1,1}; + std::vector w{0,0,0,0}; + double sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + + sup = boost::math::tools::sup_distance(v, w); + BOOST_TEST(abs(sup -1) < tol); +} + +template +void test_complex_sup_distance() +{ + typedef typename Complex::value_type Real; + Real tol = 100*std::numeric_limits::epsilon(); + std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; + Real l2 = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(l2) < tol); +} + + int main() { test_lp(); @@ -311,6 +484,14 @@ int main() test_complex_lp>(); test_complex_lp(); + test_integer_lp(); + + test_lp_distance(); + test_lp_distance(); + + test_complex_lp_distance>(); + test_complex_lp_distance(); + test_sup_norm(); test_sup_norm(); test_sup_norm(); @@ -323,6 +504,14 @@ int main() test_complex_sup_norm>(); test_complex_sup_norm(); + test_sup_distance(); + test_sup_distance(); + + test_integer_sup_distance(); + + test_complex_sup_distance>(); + test_complex_sup_distance(); + test_l0_pseudo_norm(); test_l0_pseudo_norm(); test_l0_pseudo_norm(); @@ -334,6 +523,8 @@ int main() test_complex_l0_pseudo_norm>(); test_complex_l0_pseudo_norm(); + test_hamming_distance(); + test_l1_norm(); test_l1_norm(); test_l1_norm(); @@ -346,6 +537,14 @@ int main() test_complex_l1_norm>(); test_complex_l1_norm(); + test_l1_distance(); + test_l1_distance(); + + test_integer_l1_distance(); + + test_complex_l1_distance>(); + test_complex_l1_distance(); + test_complex_l2_norm>(); test_complex_l2_norm>(); test_complex_l2_norm>(); @@ -356,6 +555,16 @@ int main() test_l2_norm(); test_l2_norm(); + test_integer_l2_norm(); + + test_l2_distance(); + test_l2_distance(); + + test_integer_l2_distance(); + + test_complex_l2_distance>(); + test_complex_l2_distance(); + test_total_variation(); test_total_variation(); test_total_variation(); From fb4f7748cd0a9f7a4a3b92529dc9778cee781a03 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 11 Jan 2019 15:19:48 -0700 Subject: [PATCH 35/46] Refactor kurtosis calculation to use first_four_moments. --- .../math/tools/univariate_statistics.hpp | 98 +++++-------------- test/univariate_statistics_test.cpp | 2 +- 2 files changed, 25 insertions(+), 75 deletions(-) diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 8560f2d7f5..0886956df8 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -172,13 +172,13 @@ inline auto skewness(Container const & v) return skewness(v.cbegin(), v.cend()); } -// Follows equation 1.6 of: +// Follows equation 1.5/1.6 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto kurtosis(ForwardIterator first, ForwardIterator last) +auto first_four_moments(ForwardIterator first, ForwardIterator last) { using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute kurtosis."); + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); if constexpr (std::is_integral::value) { double M1 = *first; @@ -197,13 +197,7 @@ auto kurtosis(ForwardIterator first, ForwardIterator last) n += 1; } - double var = M2/(n-1); - if (var == 0) - { - return double(0); - } - double kurt = M4/((n-1)*var*var); - return kurt; + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); } else { @@ -223,92 +217,49 @@ auto kurtosis(ForwardIterator first, ForwardIterator last) n += 1; } - Real var = M2/(n-1); - if (var == 0) - { - // Again, the limit is technically undefined, but the interpretation here is clear: - // A constant dataset has no kurtosis. - return Real(0); - } - Real kurt = M4/((n-1)*var*var); - return kurt; + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); } } template -inline auto kurtosis(Container const & v) +inline auto first_four_moments(Container const & v) { - return kurtosis(v.cbegin(), v.cend()); + return first_four_moments(v.cbegin(), v.cend()); } + +// Follows equation 1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +auto kurtosis(ForwardIterator first, ForwardIterator last) { - return kurtosis(first, last) - 3; + auto [M1, M2, M3, M4] = first_four_moments(first, last); + if (M2 == 0) + { + return M2; + } + return M4/(M2*M2); } template -inline auto excess_kurtosis(Container const & v) +inline auto kurtosis(Container const & v) { - return excess_kurtosis(v.cbegin(), v.cend()); + return kurtosis(v.cbegin(), v.cend()); } - -// Follows equation 1.5/1.6 of: -// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf template -auto first_four_moments(ForwardIterator first, ForwardIterator last) +auto excess_kurtosis(ForwardIterator first, ForwardIterator last) { - using Real = typename std::iterator_traits::value_type; - BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); - if constexpr (std::is_integral::value) - { - double M1 = *first; - double M2 = 0; - double M3 = 0; - double M4 = 0; - double n = 2; - for (auto it = std::next(first); it != last; ++it) - { - double delta21 = *it - M1; - double tmp = delta21/n; - M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); - M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); - M2 = M2 + tmp*(n-1)*delta21; - M1 = M1 + tmp; - n += 1; - } - - return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); - } - else - { - Real M1 = *first; - Real M2 = 0; - Real M3 = 0; - Real M4 = 0; - Real n = 2; - for (auto it = std::next(first); it != last; ++it) - { - Real delta21 = *it - M1; - Real tmp = delta21/n; - M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); - M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); - M2 = M2 + tmp*(n-1)*delta21; - M1 = M1 + tmp; - n += 1; - } - - return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); - } + return kurtosis(first, last) - 3; } template -inline auto first_four_moments(Container const & v) +inline auto excess_kurtosis(Container const & v) { - return first_four_moments(v.cbegin(), v.cend()); + return excess_kurtosis(v.cbegin(), v.cend()); } + template auto median(RandomAccessIterator first, RandomAccessIterator last) { @@ -416,6 +367,5 @@ inline auto median_absolute_deviation(RandomAccessContainer & v, typename Random return median_absolute_deviation(v.begin(), v.end(), center); } - } #endif diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index c247ed013f..dc6b10a19e 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -242,13 +242,13 @@ void test_kurtosis() // or generates too many, giving pretty wildly different values of kurtosis on different runs. // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run. // The CI system, however, would die on a million long doubles. + //v3.resize(1000000); //std::exponential_distribution edis(0.1); //for (size_t i = 0; i < v3.size(); ++i) { // v3[i] = edis(gen); //} //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3; //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2); - } template From ce7fdbe581a14662d17102b0b31dff855e672a14 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 12 Jan 2019 12:53:53 -0700 Subject: [PATCH 36/46] Take care of integer overflow. [CI SKIP] --- include/boost/math/tools/norms.hpp | 148 ++++++--- test/norms_test.cpp | 500 ++++++++++++++++++++++++----- 2 files changed, 529 insertions(+), 119 deletions(-) diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp index 1533426a34..57e5dff2f1 100644 --- a/include/boost/math/tools/norms.hpp +++ b/include/boost/math/tools/norms.hpp @@ -18,14 +18,13 @@ namespace boost::math::tools { template auto total_variation(ForwardIterator first, ForwardIterator last) { - using Real = typename std::iterator_traits::value_type; + using T = typename std::iterator_traits::value_type; using std::abs; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "At least two samples are required to compute the total variation."); auto it = first; - Real tmp = *it; - - if constexpr (std::is_unsigned::value) + if constexpr (std::is_unsigned::value) { + T tmp = *it; double tv = 0; while (++it != last) { @@ -41,9 +40,22 @@ auto total_variation(ForwardIterator first, ForwardIterator last) } return tv; } + else if constexpr (std::is_integral::value) + { + double tv = 0; + double tmp = *it; + while(++it != last) + { + double tmp2 = *it; + tv += abs(tmp2 - tmp); + tmp = *it; + } + return tv; + } else { - Real tv = 0; + T tmp = *it; + T tv = 0; while (++it != last) { tv += abs(*it - tmp); @@ -64,14 +76,18 @@ template auto sup_norm(ForwardIterator first, ForwardIterator last) { BOOST_ASSERT_MSG(first != last, "At least one value is required to compute the sup norm."); - using RealOrComplex = typename std::iterator_traits::value_type; + using T = typename std::iterator_traits::value_type; using std::abs; - if constexpr (boost::is_complex::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { - auto it = std::max_element(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + auto it = std::max_element(first, last, [](T a, T b) { return abs(b) > abs(a); }); return abs(*it); } + else if constexpr (std::is_unsigned::value) + { + return *std::max_element(first, last); + } else { auto pair = std::minmax_element(first, last); @@ -95,13 +111,37 @@ inline auto sup_norm(Container const & v) template auto l1_norm(ForwardIterator first, ForwardIterator last) { + using T = typename std::iterator_traits::value_type; using std::abs; - decltype(abs(*first)) l1 = 0; - for (auto it = first; it != last; ++it) + if constexpr (std::is_unsigned::value) { - l1 += abs(*first); + double l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + } + return l1; + } + else if constexpr (std::is_integral::value) + { + double l1 = 0; + for (auto it = first; it != last; ++it) + { + double tmp = *it; + l1 += abs(tmp); + } + return l1; } - return l1; + else + { + decltype(abs(*first)) l1 = 0; + for (auto it = first; it != last; ++it) + { + l1 += abs(*it); + } + return l1; + } + } template @@ -114,15 +154,15 @@ inline auto l1_norm(Container const & v) template auto l2_norm(ForwardIterator first, ForwardIterator last) { - using RealOrComplex = typename std::iterator_traits::value_type; + using T = typename std::iterator_traits::value_type; using std::abs; using std::norm; using std::sqrt; using std::is_floating_point; - if constexpr (boost::is_complex::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + if constexpr (boost::is_complex::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { - typedef typename RealOrComplex::value_type Real; + typedef typename T::value_type Real; Real l2 = 0; for (auto it = first; it != last; ++it) { @@ -141,22 +181,22 @@ auto l2_norm(ForwardIterator first, ForwardIterator last) } return result; } - else if constexpr (is_floating_point::value || - boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + else if constexpr (is_floating_point::value || + boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) { - RealOrComplex l2 = 0; + T l2 = 0; for (auto it = first; it != last; ++it) { l2 += (*it)*(*it); } - RealOrComplex result = sqrt(l2); + T result = sqrt(l2); if (!isfinite(result)) { - RealOrComplex a = sup_norm(first, last); + T a = sup_norm(first, last); l2 = 0; for (auto it = first; it != last; ++it) { - RealOrComplex tmp = *it/a; + T tmp = *it/a; l2 += tmp*tmp; } return a*sqrt(l2); @@ -286,9 +326,9 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p) for (auto it = first; it != last; ++it) { - lp += pow(abs(*it), p); + double tmp = *it; + lp += pow(abs(tmp), p); } - double result = pow(lp, 1.0/double(p)); if (!isfinite(result)) { @@ -296,7 +336,8 @@ auto lp_norm(ForwardIterator first, ForwardIterator last, unsigned p) lp = 0; for (auto it = first; it != last; ++it) { - lp += pow(abs(*it)/a, p); + double tmp = *it; + lp += pow(abs(tmp)/a, p); } result = a*pow(lp, double(1)/double(p)); } @@ -319,14 +360,15 @@ auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator using std::is_floating_point; using std::isfinite; using RealOrComplex = typename std::iterator_traits::value_type; + auto it1 = first1; + auto it2 = first2; + if constexpr (boost::is_complex::value || boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) { using Real = typename RealOrComplex::value_type; using std::norm; Real dist = 0; - auto it1 = first1; - auto it2 = first2; while(it1 != last1) { auto tmp = *it1++ - *it2++; @@ -338,27 +380,24 @@ auto lp_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) { RealOrComplex dist = 0; - auto it1 = first1; - auto it2 = first2; while(it1 != last1) { auto tmp = *it1++ - *it2++; dist += pow(abs(tmp), p); } - return pow(dist, RealOrComplex(1)/RealOrComplex(p)); } else { - BOOST_ASSERT_MSG(p >= 0, "For p < 0, the lp norm is not a norm"); double dist = 0; - - auto it1 = first1; - auto it2 = first2; while(it1 != last1) { - double tmp = *it1++ - *it2++; - dist += pow(abs(tmp), p); + double tmp1 = *it1++; + double tmp2 = *it2++; + // Naively you'd expect the integer subtraction to be faster, + // but this can overflow or wraparound: + //double tmp = *it1++ - *it2++; + dist += pow(abs(tmp1 - tmp2), p); } return pow(dist, 1.0/double(p)); } @@ -400,11 +439,8 @@ auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator } return sum; } - else + else if constexpr (std::is_unsigned::value) { - // Why choose double precision? - // First, consistency: l2 and lp distance cannot be returned as floating point types. - // Second, if the type is a small integer type (like uint8_t), then the result will overflow. double sum = 0; while(it1 != last1) { @@ -421,6 +457,22 @@ auto l1_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator } return sum; } + else if constexpr (std::is_integral::value) + { + double sum = 0; + while(it1 != last1) + { + double x1 = *it1++; + double x2 = *it2++; + sum += abs(x1-x2); + } + return sum; + } + else + { + BOOST_ASSERT_MSG(false, "Could not recognize type."); + } + } template @@ -464,7 +516,7 @@ auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator } return sqrt(sum); } - else // integral values: + else if constexpr (std::is_unsigned::value) { double sum = 0; while(it1 != last1) @@ -484,6 +536,18 @@ auto l2_distance(ForwardIterator first1, ForwardIterator last1, ForwardIterator } return sqrt(sum); } + else + { + double sum = 0; + while(it1 != last1) + { + double x1 = *it1++; + double x2 = *it2++; + double tmp = x1-x2; + sum += tmp*tmp; + } + return sqrt(sum); + } } template diff --git a/test/norms_test.cpp b/test/norms_test.cpp index bffe2b6c22..930d96ea19 100644 --- a/test/norms_test.cpp +++ b/test/norms_test.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,15 @@ using std::pow; using std::sqrt; using boost::multiprecision::cpp_bin_float_50; using boost::multiprecision::cpp_complex_50; +using boost::math::tools::lp_norm; +using boost::math::tools::l1_norm; +using boost::math::tools::l2_norm; +using boost::math::tools::sup_norm; +using boost::math::tools::lp_distance; +using boost::math::tools::l1_distance; +using boost::math::tools::l2_distance; +using boost::math::tools::sup_distance; +using boost::math::tools::total_variation; /* * Test checklist: @@ -32,6 +42,75 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ +// To stress test, set global_seed = 0, global_size = huge. +static const constexpr size_t global_seed = 834; +static const constexpr size_t global_size = 64; + +template +std::vector generate_random_vector(size_t size, size_t seed) +{ + if (seed == 0) + { + std::random_device rd; + seed = rd(); + } + std::vector v(size); + + std::mt19937 gen(seed); + + if constexpr (std::is_floating_point::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (std::is_integral::value) + { + // Rescaling by larger than 2 is UB! + std::uniform_int_distribution dis(std::numeric_limits::lowest()/2, std::numeric_limits::max()/2); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (boost::is_complex::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else + { + BOOST_ASSERT_MSG(false, "Could not identify type for random vector generation."); + return v; + } +} + template void test_lp() @@ -39,18 +118,18 @@ void test_lp() Real tol = 50*std::numeric_limits::epsilon(); std::array u{1,0,0}; - Real l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + Real l3 = lp_norm(u.begin(), u.end(), 3); BOOST_TEST(abs(l3 - 1) < tol); u[0] = -8; - l3 = boost::math::tools::lp_norm(u.cbegin(), u.cend(), 3); + l3 = lp_norm(u.cbegin(), u.cend(), 3); BOOST_TEST(abs(l3 - 8) < tol); std::vector v(500); for (size_t i = 0; i < v.size(); ++i) { v[i] = 7; } - Real l8 = boost::math::tools::lp_norm(v, 8); + Real l8 = lp_norm(v, 8); Real expected = 7*pow(v.size(), static_cast(1)/static_cast(8)); BOOST_TEST(abs(l8 - expected) < tol*abs(expected)); @@ -62,9 +141,19 @@ void test_lp() { w[i] = bignum; } - Real l20 = boost::math::tools::lp_norm(w.cbegin(), w.cend(), 4); + Real l20 = lp_norm(w.cbegin(), w.cend(), 4); expected = bignum*pow(w.size(), static_cast(1)/static_cast(4)); BOOST_TEST(abs(l20 - expected) < tol*expected); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= -scale; + } + Real l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); } @@ -72,45 +161,72 @@ template void test_complex_lp() { typedef typename Complex::value_type Real; - Real tol = std::numeric_limits::epsilon(); + Real tol = 50*std::numeric_limits::epsilon(); std::vector v{{1,0}, {0,0}, {0,0}}; - Real l3 = boost::math::tools::lp_norm(v.cbegin(), v.cend(), 3); + Real l3 = lp_norm(v.cbegin(), v.cend(), 3); BOOST_TEST(abs(l3 - 1) < tol); - l3 = boost::math::tools::lp_norm(v, 3); + l3 = lp_norm(v, 3); BOOST_TEST(abs(l3 - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= -scale; + } + Real l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); } template void test_integer_lp() { - double tol = 50*std::numeric_limits::epsilon(); + double tol = 100*std::numeric_limits::epsilon(); std::array u{1,0,0}; - double l3 = boost::math::tools::lp_norm(u.begin(), u.end(), 3); + double l3 = lp_norm(u.begin(), u.end(), 3); BOOST_TEST(abs(l3 - 1) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double l7 = scale*lp_norm(v, 7); + for (auto & x : v) + { + x *= scale; + } + double l7_ = lp_norm(v, 7); + BOOST_TEST(abs(l7_ - l7) < tol*l7); } template void test_lp_distance() { - Real tol = 50*std::numeric_limits::epsilon(); + Real tol = 100*std::numeric_limits::epsilon(); std::vector u{1,0,0}; std::vector v{0,0,0}; - Real dist = boost::math::tools::lp_distance(u,u, 3); + Real dist = lp_distance(u,u, 3); BOOST_TEST(abs(dist) < tol); - dist = boost::math::tools::lp_distance(u,v, 3); + dist = lp_distance(u,v, 3); BOOST_TEST(abs(dist - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + u = generate_random_vector(global_size, global_seed+1); + Real dist1 = lp_distance(u, v, 7); + Real dist2 = lp_distance(v, u, 7); + + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template void test_complex_lp_distance() { using Real = typename Complex::value_type; - Real tol = 50*std::numeric_limits::epsilon(); + Real tol = 100*std::numeric_limits::epsilon(); std::vector u{{1,0},{0,0},{0,0}}; std::vector v{{0,0},{0,0},{0,0}}; @@ -120,21 +236,51 @@ void test_complex_lp_distance() dist = boost::math::tools::lp_distance(u,v, 3); BOOST_TEST(abs(dist - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + u = generate_random_vector(global_size, global_seed + 1); + Real dist1 = lp_distance(u, v, 7); + Real dist2 = lp_distance(v, u, 7); + + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} + +template +void test_integer_lp_distance() +{ + double tol = 100*std::numeric_limits::epsilon(); + + std::array u{1,0,0}; + std::array w{0,0,0}; + double l3 = lp_distance(u, w, 3); + BOOST_TEST(abs(l3 - 1) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + auto s = generate_random_vector(global_size, global_seed + 1); + double dist1 = lp_distance(v, s, 7); + double dist2 = lp_distance(s, v, 7); + BOOST_TEST(abs(dist1 - dist2) < tol*dist2); } template void test_integer_total_variation() { + double eps = std::numeric_limits::epsilon(); std::vector v{1,1}; double tv = boost::math::tools::total_variation(v); - BOOST_TEST_EQ(tv,0); + BOOST_TEST_EQ(tv, 0); v[1] = 2; tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST_EQ(tv,1); + BOOST_TEST_EQ(tv, 1); - v.resize(50); + v.resize(16); for (size_t i = 0; i < v.size(); ++i) { v[i] = i; } @@ -142,23 +288,32 @@ void test_integer_total_variation() tv = boost::math::tools::total_variation(v); BOOST_TEST_EQ(tv, v.size() -1); - for (size_t i = 0; i < v.size(); ++i) { + for (size_t i = 0; i < v.size(); ++i) + { v[i] = i*i; } tv = boost::math::tools::total_variation(v); - BOOST_TEST_EQ(tv, (v.size() -1)*(v.size()-1)); + BOOST_TEST_EQ(tv, (v.size() - 1)*(v.size() - 1)); // Work with std::array? std::array w{1,1}; tv = boost::math::tools::total_variation(w); BOOST_TEST_EQ(tv,0); - // Work with both signed and unsigned integers? std::array u{1, 2, 1, 2}; tv = boost::math::tools::total_variation(u); BOOST_TEST_EQ(tv, 3); + v = generate_random_vector(global_size, global_seed); + double tv1 = 2*total_variation(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double tv2 = total_variation(v); + BOOST_TEST(abs(tv1 - tv2) < tv1*eps); } template @@ -166,14 +321,14 @@ void test_total_variation() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1}; - Real tv = boost::math::tools::total_variation(v.begin(), v.end()); + Real tv = total_variation(v.begin(), v.end()); BOOST_TEST(tv >= 0 && abs(tv) < tol); - tv = boost::math::tools::total_variation(v); + tv = total_variation(v); BOOST_TEST(tv >= 0 && abs(tv) < tol); v[1] = 2; - tv = boost::math::tools::total_variation(v.begin(), v.end()); + tv = total_variation(v.begin(), v.end()); BOOST_TEST(abs(tv - 1) < tol); v.resize(50); @@ -181,15 +336,26 @@ void test_total_variation() v[i] = i; } - tv = boost::math::tools::total_variation(v.begin(), v.end()); + tv = total_variation(v.begin(), v.end()); BOOST_TEST(abs(tv - (v.size() -1)) < tol); for (size_t i = 0; i < v.size(); ++i) { v[i] = i*i; } - tv = boost::math::tools::total_variation(v.begin(), v.end()); - BOOST_TEST(abs(tv - (v.size() -1)*(v.size()-1)) < tol); + tv = total_variation(v.begin(), v.end()); + BOOST_TEST(abs(tv - (v.size() - 1)*(v.size() - 1)) < tol); + + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real tv1 = scale*total_variation(v); + for (auto & x : v) + { + x *= -scale; + } + Real tv2 = total_variation(v); + BOOST_TEST(abs(tv1 - tv2) < tol*tv1); } template @@ -207,17 +373,38 @@ void test_sup_norm() std::array w{-2,1,0}; s = boost::math::tools::sup_norm(w); BOOST_TEST(abs(s - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real sup1 = scale*sup_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < tol*sup1); } template void test_integer_sup_norm() { - std::vector v{-2,1,0}; - Z s = boost::math::tools::sup_norm(v.begin(), v.end()); + double eps = std::numeric_limits::epsilon(); + std::vector v{2,1,0}; + Z s = sup_norm(v.begin(), v.end()); BOOST_TEST_EQ(s, 2); - s = boost::math::tools::sup_norm(v); + s = sup_norm(v); BOOST_TEST_EQ(s,2); + + v = generate_random_vector(global_size, global_seed); + double sup1 = 2*sup_norm(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < sup1*eps); } template @@ -226,11 +413,21 @@ void test_complex_sup_norm() typedef typename Complex::value_type Real; Real tol = std::numeric_limits::epsilon(); std::vector w{{0,-8}, {1,1}, {3,2}}; - Real s = boost::math::tools::sup_norm(w.cbegin(), w.cend()); + Real s = sup_norm(w.cbegin(), w.cend()); BOOST_TEST(abs(s-8) < tol); - s = boost::math::tools::sup_norm(w); + s = sup_norm(w); BOOST_TEST(abs(s-8) < tol); + + auto v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real sup1 = scale*sup_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real sup2 = sup_norm(v); + BOOST_TEST(abs(sup1 - sup2) < tol*sup1); } template @@ -280,23 +477,52 @@ void test_l1_norm() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1,1}; - Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + Real l1 = l1_norm(v.begin(), v.end()); BOOST_TEST(abs(l1 - 3) < tol); - l1 = boost::math::tools::l1_norm(v); + l1 = l1_norm(v); BOOST_TEST(abs(l1 - 3) < tol); std::array w{1,1,1}; - l1 = boost::math::tools::l1_norm(w); + l1 = l1_norm(w); BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l1_1 = scale*l1_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l1_2 = l1_norm(v); + BOOST_TEST(abs(l1_1 - l1_2) < tol*l1_1); } template void test_integer_l1_norm() { + double eps = std::numeric_limits::epsilon(); std::vector v{1,1,1}; Z l1 = boost::math::tools::l1_norm(v.begin(), v.end()); BOOST_TEST_EQ(l1, 3); + + v = generate_random_vector(global_size, global_seed); + double l1_1 = 2*l1_norm(v); + Z scale = 2; + for (auto & x : v) + { + x *= scale; + } + double l1_2 = l1_norm(v); + BOOST_TEST(l1_1 > 0); + BOOST_TEST(l1_2 > 0); + if (abs(l1_1 - l1_2) > 2*l1_1*eps) + { + std::cout << std::setprecision(std::numeric_limits::digits10); + std::cout << "L1_1 = " << l1_1 << "\n"; + std::cout << "L1_2 = " << l1_2 << "\n"; + BOOST_TEST(abs(l1_1 - l1_2) < 2*l1_1*eps); + } } template @@ -305,11 +531,21 @@ void test_complex_l1_norm() typedef typename Complex::value_type Real; Real tol = std::numeric_limits::epsilon(); std::vector v{{1,0}, {0,1},{0,-1}}; - Real l1 = boost::math::tools::l1_norm(v.begin(), v.end()); + Real l1 = l1_norm(v.begin(), v.end()); BOOST_TEST(abs(l1 - 3) < tol); - l1 = boost::math::tools::l1_norm(v); + l1 = l1_norm(v); BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l1_1 = scale*l1_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l1_2 = l1_norm(v); + BOOST_TEST(abs(l1_1 - l1_2) < tol*l1_1); } template @@ -326,6 +562,12 @@ void test_l1_distance() l1 = boost::math::tools::l1_distance(v, w); BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed+1); + Real dist1 = l1_distance(v, w); + Real dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template @@ -342,6 +584,12 @@ void test_integer_l1_distance() l1 = boost::math::tools::l1_distance(v, w); BOOST_TEST(abs(l1 - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + double dist1 = l1_distance(v, w); + double dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template @@ -356,6 +604,12 @@ void test_complex_l1_distance() std::vector w{{2,0}, {0,1},{0,-1}}; l1 = boost::math::tools::l1_distance(v.cbegin(), v.cend(), w.cbegin()); BOOST_TEST(abs(l1 - 1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l1_distance(v, w); + Real dist2 = l1_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } @@ -382,15 +636,39 @@ void test_l2_norm() v[3] = 0; l2 = boost::math::tools::l2_norm(v.begin(), v.end()); BOOST_TEST(abs(l2 - bignum) < tol*l2); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l2_2 = l2_norm(v); + BOOST_TEST(l2_1 > 0); + BOOST_TEST(l2_2 > 0); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); } template void test_integer_l2_norm() { - double tol = std::numeric_limits::epsilon(); + double tol = 100*std::numeric_limits::epsilon(); std::vector v{1,1,1,1}; double l2 = boost::math::tools::l2_norm(v.begin(), v.end()); BOOST_TEST(abs(l2 - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= scale; + } + double l2_2 = l2_norm(v); + BOOST_TEST(l2_1 > 0); + BOOST_TEST(l2_2 > 0); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); } template @@ -404,6 +682,16 @@ void test_complex_l2_norm() l2 = boost::math::tools::l2_norm(v); BOOST_TEST(abs(l2 - 2) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 8; + Real l2_1 = scale*l2_norm(v); + for (auto & x : v) + { + x *= -scale; + } + Real l2_2 = l2_norm(v); + BOOST_TEST(abs(l2_1 - l2_2) < tol*l2_1); } template @@ -413,6 +701,12 @@ void test_l2_distance() std::vector v{1,1,1,1}; Real l2 = boost::math::tools::l2_distance(v, v); BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l2_distance(v, w); + Real dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } @@ -423,6 +717,12 @@ void test_integer_l2_distance() std::vector v{1,1,1,1}; double l2 = boost::math::tools::l2_distance(v, v); BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + double dist1 = l2_distance(v, w); + double dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template @@ -433,6 +733,12 @@ void test_complex_l2_distance() std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; Real l2 = boost::math::tools::l2_distance(v, v); BOOST_TEST(abs(l2) < tol); + + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = l2_distance(v, w); + Real dist2 = l2_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template @@ -445,6 +751,12 @@ void test_sup_distance() BOOST_TEST(abs(sup) < tol); sup = boost::math::tools::sup_distance(v, w); BOOST_TEST(abs(sup -1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = sup_distance(v, w); + Real dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } @@ -459,6 +771,12 @@ void test_integer_sup_distance() sup = boost::math::tools::sup_distance(v, w); BOOST_TEST(abs(sup -1) < tol); + + v = generate_random_vector(global_size, global_seed); + w = generate_random_vector(global_size, global_seed + 1); + double dist1 = sup_distance(v, w); + double dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); } template @@ -467,51 +785,20 @@ void test_complex_sup_distance() typedef typename Complex::value_type Real; Real tol = 100*std::numeric_limits::epsilon(); std::vector v{{1,0}, {0,1},{0,-1}, {1,0}}; - Real l2 = boost::math::tools::sup_distance(v, v); - BOOST_TEST(abs(l2) < tol); -} + Real sup = boost::math::tools::sup_distance(v, v); + BOOST_TEST(abs(sup) < tol); + v = generate_random_vector(global_size, global_seed); + auto w = generate_random_vector(global_size, global_seed + 1); + Real dist1 = sup_distance(v, w); + Real dist2 = sup_distance(w, v); + BOOST_TEST(abs(dist1 - dist2) < tol*dist1); +} int main() { - test_lp(); - test_lp(); - test_lp(); - test_lp(); - - test_complex_lp>(); - test_complex_lp>(); - test_complex_lp>(); - test_complex_lp(); - - test_integer_lp(); - - test_lp_distance(); - test_lp_distance(); - - test_complex_lp_distance>(); - test_complex_lp_distance(); - - test_sup_norm(); - test_sup_norm(); - test_sup_norm(); - test_sup_norm(); - - test_integer_sup_norm(); - - test_complex_sup_norm>(); - test_complex_sup_norm>(); - test_complex_sup_norm>(); - test_complex_sup_norm(); - - test_sup_distance(); - test_sup_distance(); - - test_integer_sup_distance(); - - test_complex_sup_distance>(); - test_complex_sup_distance(); - + test_l0_pseudo_norm(); + test_l0_pseudo_norm(); test_l0_pseudo_norm(); test_l0_pseudo_norm(); test_l0_pseudo_norm(); @@ -524,6 +811,8 @@ int main() test_complex_l0_pseudo_norm(); test_hamming_distance(); + test_hamming_distance(); + test_hamming_distance(); test_l1_norm(); test_l1_norm(); @@ -531,6 +820,8 @@ int main() test_l1_norm(); test_integer_l1_norm(); + test_integer_l1_norm(); + test_integer_l1_norm(); test_complex_l1_norm>(); test_complex_l1_norm>(); @@ -541,6 +832,8 @@ int main() test_l1_distance(); test_integer_l1_distance(); + test_integer_l1_distance(); + test_integer_l1_distance(); test_complex_l1_distance>(); test_complex_l1_distance(); @@ -556,15 +849,67 @@ int main() test_l2_norm(); test_integer_l2_norm(); + test_integer_l2_norm(); + test_integer_l2_norm(); test_l2_distance(); test_l2_distance(); test_integer_l2_distance(); + test_integer_l2_distance(); + test_integer_l2_distance(); test_complex_l2_distance>(); test_complex_l2_distance(); + test_lp(); + test_lp(); + test_lp(); + test_lp(); + + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp>(); + test_complex_lp(); + + test_integer_lp(); + test_integer_lp(); + test_integer_lp(); + + test_lp_distance(); + test_lp_distance(); + + test_complex_lp_distance>(); + test_complex_lp_distance(); + + test_integer_lp_distance(); + test_integer_lp_distance(); + test_integer_lp_distance(); + + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + test_sup_norm(); + + test_integer_sup_norm(); + test_integer_sup_norm(); + test_integer_sup_norm(); + + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm>(); + test_complex_sup_norm(); + + test_sup_distance(); + test_sup_distance(); + + test_integer_sup_distance(); + test_integer_sup_distance(); + test_integer_sup_distance(); + + test_complex_sup_distance>(); + test_complex_sup_distance(); + test_total_variation(); test_total_variation(); test_total_variation(); @@ -572,6 +917,7 @@ int main() test_integer_total_variation(); test_integer_total_variation(); + test_integer_total_variation(); return boost::report_errors(); } From f3af883c6db7d1f84c1af1b202ee2404c906a043 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 12 Jan 2019 14:57:16 -0700 Subject: [PATCH 37/46] Patch up univariate statistics for some integer types. [CI SKIP] --- .../math/tools/univariate_statistics.hpp | 50 +++-- test/univariate_statistics_test.cpp | 206 +++++++++++++++++- 2 files changed, 236 insertions(+), 20 deletions(-) diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 0886956df8..60e4c15c7c 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -294,24 +294,46 @@ auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); std::sort(first, last); - - Real i = 1; - Real num = 0; - Real denom = 0; - for (auto it = first; it != last; ++it) + if constexpr (std::is_integral::value) { - num += *it*i; - denom += *it; - ++i; - } + double i = 1; + double num = 0; + double denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } - // If the l1 norm is zero, all elements are zero, so every element is the same. - if (denom == 0) - { - return Real(0); + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return double(0); + } + + return ((2*num)/denom - i)/(i-1); } + else + { + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } - return ((2*num)/denom - i)/(i-1); + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + + return ((2*num)/denom - i)/(i-1); + } } template diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index dc6b10a19e..6bcbc5fe05 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -29,10 +29,80 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ + // To stress test, set global_seed = 0, global_size = huge. + static const constexpr size_t global_seed = 0; + static const constexpr size_t global_size = 128; + +template +std::vector generate_random_vector(size_t size, size_t seed) +{ + if (seed == 0) + { + std::random_device rd; + seed = rd(); + } + std::vector v(size); + + std::mt19937 gen(seed); + + if constexpr (std::is_floating_point::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (std::is_integral::value) + { + // Rescaling by larger than 2 is UB! + std::uniform_int_distribution dis(std::numeric_limits::lowest()/2, std::numeric_limits::max()/2); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else if constexpr (boost::is_complex::value) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_complex) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = {dis(gen), dis(gen)}; + } + return v; + } + else if constexpr (boost::multiprecision::number_category::value == boost::multiprecision::number_kind_floating_point) + { + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = dis(gen); + } + return v; + } + else + { + BOOST_ASSERT_MSG(false, "Could not identify type for random vector generation."); + return v; + } +} + + template void test_integer_mean() { - double tol = std::numeric_limits::epsilon(); + double tol = 100*std::numeric_limits::epsilon(); std::vector v{1,2,3,4,5}; double mu = boost::math::tools::mean(v); BOOST_TEST(abs(mu - 3) < tol); @@ -41,6 +111,17 @@ void test_integer_mean() std::array w{1,2,3,4,5}; mu = boost::math::tools::mean(w); BOOST_TEST(abs(mu - 3) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + + double m1 = scale*boost::math::tools::mean(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::mean(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -82,6 +163,15 @@ void test_mean() mu = boost::math::tools::mean(w.cbegin(), w.cend()); BOOST_TEST(abs(mu - 4) < tol); + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = scale*boost::math::tools::mean(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::mean(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -128,6 +218,16 @@ void test_variance() std::forward_list l{0,1,0,1,0,1,0,1}; sigma_sq = boost::math::tools::variance(l.begin(), l.end()); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = scale*scale*boost::math::tools::variance(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::variance(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -141,6 +241,16 @@ void test_integer_variance() std::forward_list l{0,1,0,1,0,1,0,1}; sigma_sq = boost::math::tools::variance(l.begin(), l.end()); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = scale*scale*boost::math::tools::variance(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::variance(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -165,6 +275,17 @@ void test_integer_skewness() skew = boost::math::tools::skewness(v); BOOST_TEST(abs(skew - 3.0/2.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = boost::math::tools::skewness(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::skewness(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); + } template @@ -192,6 +313,16 @@ void test_skewness() std::forward_list w2{0,0,0,0,5}; skew = boost::math::tools::skewness(w2); BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); + + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = boost::math::tools::skewness(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::skewness(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -236,6 +367,15 @@ void test_kurtosis() auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3); BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2); + v = generate_random_vector(global_size, global_seed); + Real scale = 2; + Real m1 = boost::math::tools::kurtosis(v); + for (auto & x : v) + { + x *= scale; + } + Real m2 = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); // This test only passes when there are a large number of samples. // Otherwise, the distribution doesn't generate enough outliers to give, @@ -268,6 +408,16 @@ void test_integer_kurtosis() // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 kurt = boost::math::tools::kurtosis(v); BOOST_TEST(abs(kurt - 13.0/4.0) < tol); + + v = generate_random_vector(global_size, global_seed); + Z scale = 2; + double m1 = boost::math::tools::kurtosis(v); + for (auto & x : v) + { + x *= scale; + } + double m2 = boost::math::tools::kurtosis(v); + BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } template @@ -492,6 +642,42 @@ void test_gini_coefficient() } +template +void test_integer_gini_coefficient() +{ + double tol = std::numeric_limits::epsilon(); + std::vector v{1,0,0}; + double gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + double expected = 2.0/3.0; + BOOST_TEST(abs(gini - expected) < tol); + + gini = boost::math::tools::gini_coefficient(v); + BOOST_TEST(abs(gini - expected) < tol); + + v[0] = 1; + v[1] = 1; + v[2] = 1; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + v[0] = 0; + v[1] = 0; + v[2] = 0; + gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + BOOST_TEST(abs(gini) < tol); + + std::array w{0,0,0}; + gini = boost::math::tools::gini_coefficient(w); + BOOST_TEST(abs(gini) < tol); + + boost::numeric::ublas::vector w1(3); + w1[0] = 1; + w1[1] = 1; + w1[2] = 1; + gini = boost::math::tools::gini_coefficient(w1); + BOOST_TEST(abs(gini) < tol); +} + int main() { test_mean(); @@ -512,6 +698,7 @@ int main() test_variance(); test_integer_variance(); + test_integer_variance(); test_skewness(); test_skewness(); @@ -519,6 +706,12 @@ int main() test_skewness(); test_integer_skewness(); + test_integer_skewness(); + + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); + test_first_four_moments(); test_kurtosis(); test_kurtosis(); @@ -526,28 +719,29 @@ int main() // Kinda expensive: //test_kurtosis(); - test_first_four_moments(); - test_first_four_moments(); - test_first_four_moments(); - test_first_four_moments(); - test_integer_kurtosis(); + test_integer_kurtosis(); test_median(); test_median(); test_median(); test_median(); + test_median(); test_median_absolute_deviation(); test_median_absolute_deviation(); test_median_absolute_deviation(); test_median_absolute_deviation(); + test_median_absolute_deviation(); test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); test_gini_coefficient(); + test_integer_gini_coefficient(); + test_integer_gini_coefficient(); + test_sample_gini_coefficient(); test_sample_gini_coefficient(); test_sample_gini_coefficient(); From 75e7832a54315bd42420351c89360ed61057e5b8 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Wed, 16 Jan 2019 11:14:53 -0700 Subject: [PATCH 38/46] Add unsigned integer tests for Hoyer sparsity. --- doc/vector_functionals/signal_statistics.qbk | 2 +- .../boost/math/tools/signal_statistics.hpp | 51 ++++++++++++------- test/signal_statistics_test.cpp | 2 + 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index b7a483aa51..5ef0e1db2e 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -61,7 +61,7 @@ The file `boost/math/tools/signal_statistics.hpp` is a set of facilities for com Our examples use `std::vector` to hold the data, but this not required. In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined. -For certain operations (total variation, for example) integer inputs are supported. + [heading Absolute Gini Coefficient] diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index d2fbace685..021d280705 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -72,30 +72,47 @@ inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v) template auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { - using RealIntOrComplex = typename std::iterator_traits::value_type; + using T = typename std::iterator_traits::value_type; using std::abs; using std::sqrt; BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples."); - decltype(abs(*first)) l1 = 0; - decltype(abs(*first)) l2 = 0; - decltype(abs(*first)) n = 0; - for (auto it = first; it != last; ++it) - { - decltype(abs(*first)) tmp = abs(*it); - l1 += tmp; - l2 += tmp*tmp; - n += 1; - } - if constexpr (std::is_integral::value) + if constexpr (std::is_unsigned::value) { + T l1 = 0; + T l2 = 0; + T n = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + l2 += (*it)*(*it); + n += 1; + } + double rootn = sqrt(n); return (rootn - l1/sqrt(l2) )/ (rootn - 1); } - else - { - decltype(abs(*first)) rootn = sqrt(n); - return (rootn - l1/sqrt(l2) )/ (rootn - 1); + else { + decltype(abs(*first)) l1 = 0; + decltype(abs(*first)) l2 = 0; + decltype(abs(*first)) n = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + l1 += tmp; + l2 += tmp*tmp; + n += 1; + } + if constexpr (std::is_integral::value) + { + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else + { + decltype(abs(*first)) rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } } } @@ -298,7 +315,7 @@ auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*f else { BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); - return *first; + return std::numeric_limits::quiet_NaN(); } } diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 330b629f58..963ab01656 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -304,6 +304,7 @@ int main() test_hoyer_sparsity(); test_integer_hoyer_sparsity(); + test_integer_hoyer_sparsity(); test_complex_hoyer_sparsity>(); test_complex_hoyer_sparsity>(); @@ -316,6 +317,7 @@ int main() test_oracle_snr(); test_integer_oracle_snr(); + test_integer_oracle_snr(); test_complex_oracle_snr>(); test_complex_oracle_snr>(); From ba5244bee1f8bb48e5bf45f09c2d689301f06434 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Thu, 17 Jan 2019 18:54:19 +0000 Subject: [PATCH 39/46] Correct minor doc typo. [CI SKIP] --- doc/vector_functionals/univariate_statistics.qbk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 92fede661f..e07430530a 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -220,7 +220,7 @@ Compute the Gini coefficient of a dataset: gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); // gini = 0, as all elements are now equal. -/Nota bene/: The input data is altered-in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators. +/Nota bene/: The input data is altered: in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators. The sample Gini coefficient lies in the range [0,1], whereas the population Gini coefficient is in the range [0, 1 - 1/ /n/]. From 89304200055a7146309f9c5373be7821d0c2c497 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Thu, 17 Jan 2019 16:49:59 -0700 Subject: [PATCH 40/46] Performance improvements [CI SKIP] --- include/boost/math/tools/univariate_statistics.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 60e4c15c7c..f88c8a7248 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -75,9 +75,9 @@ auto variance(ForwardIterator first, ForwardIterator last) Real k = 2; for (auto it = std::next(first); it != last; ++it) { - Real tmp = *it - M; - Q = Q + ((k-1)*tmp*tmp)/k; - M = M + tmp/k; + Real tmp = (*it - M)/k; + Q += k*(k-1)*tmp*tmp; + M += tmp; k += 1; } return Q/(k-1); From ff366e85a2cc828a0ba6e98fe52cf7ebb83777b2 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 18 Jan 2019 10:00:43 -0700 Subject: [PATCH 41/46] Add variance of N-sided die as unit test [CI SKIP] --- .../boost/math/tools/univariate_statistics.hpp | 6 +++--- test/univariate_statistics_test.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index f88c8a7248..32e3a9bb39 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -148,9 +148,9 @@ auto skewness(ForwardIterator first, ForwardIterator last) { Real delta21 = *it - M1; Real tmp = delta21/n; - M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); - M2 = M2 + tmp*(n-1)*delta21; - M1 = M1 + tmp; + M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 += tmp*(n-1)*delta21; + M1 += tmp; n += 1; } diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index 6bcbc5fe05..4896f52cc4 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -228,6 +228,23 @@ void test_variance() } Real m2 = boost::math::tools::variance(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); + + // Wikipedia example for a variance of N sided die: + // https://en.wikipedia.org/wiki/Variance + for (size_t j = 16; j < 2048; j *= 2) + { + v.resize(1024); + Real n = v.size(); + for (size_t i = 0; i < v.size(); ++i) + { + v[i] = i + 1; + } + + sigma_sq = boost::math::tools::variance(v); + + BOOST_TEST(abs(sigma_sq - (n*n-1)/Real(12)) <= tol*sigma_sq); + } + } template From 74de9889f9aaf10d4bfa400f9c66a6ed42ee505f Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Fri, 18 Jan 2019 20:19:27 -0700 Subject: [PATCH 42/46] Performance improvements for Hoyer sparsity [CI SKIP] --- include/boost/math/tools/signal_statistics.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 021d280705..09375e5e36 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -81,7 +81,7 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) { T l1 = 0; T l2 = 0; - T n = 0; + size_t n = 0; for (auto it = first; it != last; ++it) { l1 += *it; @@ -95,7 +95,9 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) else { decltype(abs(*first)) l1 = 0; decltype(abs(*first)) l2 = 0; - decltype(abs(*first)) n = 0; + // We wouldn't need to count the elements if it was a random access iterator, + // but our only constraint is that it's a forward iterator. + size_t n = 0; for (auto it = first; it != last; ++it) { decltype(abs(*first)) tmp = abs(*it); @@ -110,7 +112,7 @@ auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) } else { - decltype(abs(*first)) rootn = sqrt(n); + decltype(abs(*first)) rootn = sqrt(static_cast(n)); return (rootn - l1/sqrt(l2) )/ (rootn - 1); } } From 8fd1d3cfae8cb92ee712dbe6d2ba0774d1d8be59 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sat, 19 Jan 2019 15:00:03 -0700 Subject: [PATCH 43/46] Do not include all of type traits, only include is_complex.hpp. --- include/boost/math/tools/norms.hpp | 2 +- include/boost/math/tools/roots.hpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp index 57e5dff2f1..680df32fea 100644 --- a/include/boost/math/tools/norms.hpp +++ b/include/boost/math/tools/norms.hpp @@ -7,7 +7,7 @@ #define BOOST_MATH_TOOLS_NORMS_HPP #include #include -#include +#include #include #include diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index bc6bc6e1a1..0cdf99bcb4 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -10,6 +10,7 @@ #pragma once #endif #include // test for multiprecision types. +#include // test for complex types #include #include From dad9010fbe44b2f667e1c3b14da47a64830b6aaa Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sun, 20 Jan 2019 15:50:24 -0700 Subject: [PATCH 44/46] Do not include boost/type_traits.hpp; narrow the scope and include is_complex.hpp [CI SKIP] --- include/boost/math/tools/signal_statistics.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 09375e5e36..74f9dfd031 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include From c21ba1ad5589aa42ba1091f8d57b0c61ef902e25 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Sun, 20 Jan 2019 15:56:54 -0700 Subject: [PATCH 45/46] Include is_complex.hpp [CI SKIP] --- include/boost/math/tools/bivariate_statistics.hpp | 1 - include/boost/math/tools/univariate_statistics.hpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp index 08d56276c9..20b7500ed3 100644 --- a/include/boost/math/tools/bivariate_statistics.hpp +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 32e3a9bb39..226fdf46d2 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include From e6f97af63f7709c945b086b8a84d3ef886861852 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 21 Jan 2019 12:40:33 -0700 Subject: [PATCH 46/46] Add comment from Higham about overflow resistance. [CI SKIP] --- include/boost/math/tools/norms.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/boost/math/tools/norms.hpp b/include/boost/math/tools/norms.hpp index 680df32fea..478fe04db2 100644 --- a/include/boost/math/tools/norms.hpp +++ b/include/boost/math/tools/norms.hpp @@ -190,6 +190,11 @@ auto l2_norm(ForwardIterator first, ForwardIterator last) l2 += (*it)*(*it); } T result = sqrt(l2); + // Higham, Accuracy and Stability of Numerical Algorithms, + // Problem 27.5 presents a different algorithm to deal with overflow. + // The algorithm used here takes 3 passes *if* there is overflow. + // Higham's algorithm is 1 pass, but more requires operations than the no oveflow case. + // I'm operating under the assumption that overflow is rare since the dynamic range of floating point numbers is huge. if (!isfinite(result)) { T a = sup_norm(first, last);