From 4c54adf387146aa3d207892d114b6eeea879c8e5 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 13:13:01 +0100 Subject: [PATCH 1/6] Handle divide by zero cases --- include/maths/common/CLbfgs.h | 82 +++++++++++++++++++++++-- lib/maths/common/unittest/CLbfgsTest.cc | 48 ++++++++++++++- 2 files changed, 124 insertions(+), 6 deletions(-) diff --git a/include/maths/common/CLbfgs.h b/include/maths/common/CLbfgs.h index 7b6cdc350c..897380058b 100644 --- a/include/maths/common/CLbfgs.h +++ b/include/maths/common/CLbfgs.h @@ -13,6 +13,9 @@ #define INCLUDED_ml_maths_common_CLbfgs_h #include +#include +#include +#include #include #include @@ -48,6 +51,10 @@ class CLbfgs { //! The maximum number of iterations to use backtracking. static const std::size_t MAXIMUM_BACK_TRACKING_ITERATIONS; + //! The maximum number of random probes we'll use to look for a direction in which + //! the objective decreases if the gradient function returns zero. + static const std::size_t MAXIMUM_RANDOM_PROBES; + public: explicit CLbfgs(std::size_t rank, double decrease = BACKTRACKING_MIN_DECREASE, @@ -221,7 +228,8 @@ class CLbfgs { } static bool converged(double fx, double fl, double f0, double eps) { - return std::fabs(fx - fl) < eps * std::fabs(fx - f0); + // We use less than or equal so that this is true if fx == fl == f0. + return std::fabs(fx - fl) <= eps * std::fabs(fx - f0); } template @@ -232,8 +240,8 @@ class CLbfgs { // size. double s{1.0}; - double fs{f(m_X - s * m_P)}; - VECTOR xs; + VECTOR xs{m_X - m_P}; + double fs{f(xs)}; for (std::size_t i = 0; i < MAXIMUM_BACK_TRACKING_ITERATIONS && fs - m_Fx > this->minimumDecrease(s); @@ -243,10 +251,69 @@ class CLbfgs { fs = f(xs); } + // This can happen if |s m_P| < epsilon |m_X|. + if (las::distance(xs, m_X) == 0.0) { + this->fullLineSearch(f, xs, fs); + } + m_Fl = m_Fx; m_Fx = fs; - return m_X - s * m_P; + return xs; + } + + template + void fullLineSearch(const F& f, VECTOR& xs, double& fs) { + VECTOR step{las::norm(m_P) == 0.0 ? this->randomStep(f) + : this->minimumStepSize() * las::norm(m_X) / + las::norm(m_P) * m_P}; + + if (las::norm(step) == 0.0) { + xs = m_X; + fs = m_Fx; + return; + } + + // We probe a selection of points along the step direction looking for + // a minimum and polish up with a few iterations of Brent's method. + + TDoubleVec probes(MAXIMUM_BACK_TRACKING_ITERATIONS); + probes[0] = 1.0; + for (std::size_t i = 1; i < MAXIMUM_BACK_TRACKING_ITERATIONS; ++i) { + probes[i] = probes[i - 1] / m_StepScale; + } + + // We need to force the value of s_ * step into a temporary or else we + // hit an issue with Eigen expressions which causes the function values + // at all probes to be equal. + auto f_ = [&](double s_) { + VECTOR step_{s_ * step}; + return f(m_X - step_); + }; + + double smin; + double fmin; + CSolvers::globalMinimize(probes, f_, smin, fmin); + xs = m_X - smin * step; + fs = f(xs); + } + + template + VECTOR randomStep(const F& f) { + for (std::size_t i = 0; i < MAXIMUM_RANDOM_PROBES; ++i) { + // Randomly probe looking for a decrease in the function. + VECTOR step{las::zero(m_X)}; + double eps{this->minimumStepSize() * las::norm(m_X)}; + for (std::size_t j = 0; j < las::dimension(m_X); ++j) { + step(j) = CSampling::uniformSample(m_Rng, -eps, eps); + } + if (f(m_X + step) < f(m_X)) { + return -step; + } + } + // We could find a direction in which the fuction decreases. Returning + // zero means we'll exit on the convergence condition. + return las::zero(m_X); } template @@ -305,7 +372,9 @@ class CLbfgs { } double minimumDecrease(double s) const { - return m_BacktrackingMinDecrease * s * las::inner(m_Gx, m_P) / las::norm(m_P); + double normP{las::norm(m_P)}; + return m_BacktrackingMinDecrease * s * + (normP == 0.0 ? las::norm(m_Gx) : las::inner(m_Gx, m_P) / normP); } double minimumStepSize() const { @@ -317,6 +386,7 @@ class CLbfgs { std::size_t m_Rank; double m_StepScale; double m_BacktrackingMinDecrease; + CPRNG::CXorOShiro128Plus m_Rng; bool m_Initial = true; double m_F0; double m_Fl; @@ -336,6 +406,8 @@ template const double CLbfgs::STEP_SCALE{0.3}; template const std::size_t CLbfgs::MAXIMUM_BACK_TRACKING_ITERATIONS{20}; +template +const std::size_t CLbfgs::MAXIMUM_RANDOM_PROBES{10}; } } } diff --git a/lib/maths/common/unittest/CLbfgsTest.cc b/lib/maths/common/unittest/CLbfgsTest.cc index 8ad3b31be7..4c82f5dc19 100644 --- a/lib/maths/common/unittest/CLbfgsTest.cc +++ b/lib/maths/common/unittest/CLbfgsTest.cc @@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(testQuadtratic) { } auto f = [&](const TVector& x) { - return x.transpose() * diagonal.asDiagonal() * x; + return static_cast(x.transpose() * diagonal.asDiagonal() * x); }; auto g = [&](const TVector& x) { return 2.0 * diagonal.asDiagonal() * x; }; @@ -232,4 +232,50 @@ BOOST_AUTO_TEST_CASE(testConstrainedMinimize) { BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, ferror, 1e-5); } +BOOST_AUTO_TEST_CASE(testMinimizeWithVerySmallGradient) { + + // We test a function such whose gradient is less than epsilon * x0. + + double eps{1e-16}; + + TVector xmin{TVector::fromStdVector({1000.0, 1000.0, 1000.0})}; + TVector x0{TVector::fromStdVector({1200.0, 1200.0, 1200.0})}; + + auto f = [&](const TVector& x) { + return eps * (x - xmin).transpose() * (x - xmin); + }; + auto g = [&](const TVector& x) { return 2.0 * eps * (x - xmin); }; + + maths::CLbfgs lbfgs{10}; + + TVector x; + double fx; + std::tie(x, fx) = lbfgs.minimize(f, g, x0, 1e-20); + + BOOST_TEST_REQUIRE((xmin - x).norm() < 1e-6); +} + +BOOST_AUTO_TEST_CASE(testMinimizeWithInitialZeroGradient) { + + // Test random probing by forcing the gradient to zero at x0. + + TVector zero{TVector::fromStdVector({0.0, 0.0, 0.0})}; + + TVector xmin{TVector::fromStdVector({1000.0, 1000.0, 1000.0})}; + TVector x0{TVector::fromStdVector({1200.0, 1200.0, 1200.0})}; + + auto f = [&](const TVector& x) { + return (x - xmin).transpose() * (x - xmin); + }; + auto g = [&](const TVector& x) { return x == x0 ? zero : 2.0 * (x - xmin); }; + + maths::CLbfgs lbfgs{10}; + + TVector x; + double fx; + std::tie(x, fx) = lbfgs.minimize(f, g, x0, 1e-20); + + BOOST_TEST_REQUIRE((xmin - x).norm() < 1e-6); +} + BOOST_AUTO_TEST_SUITE_END() From 4d908d31cb8154273f505e7a843b0d75044c6a71 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 13:27:20 +0100 Subject: [PATCH 2/6] Docs --- docs/CHANGELOG.asciidoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 51296d71f7..d97b44f3e2 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -60,6 +60,11 @@ * Make sure instrumentation captures the best hyperparameters we found for training classification and regression models. (See {ml-pull}2057{#2057}.) +=== Bug Fixes + +* Fix numerical instability in hyperparameter optimisation for training regression and + classification models. (See {ml-pull}2078[#2078].) + == {es} version 7.15.0 === Enhancements From a7f0c5e2f4976fc4a01c72dc69e4adef306e5cff Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 14:36:12 +0100 Subject: [PATCH 3/6] Test thresholds --- include/maths/common/CLbfgs.h | 2 +- lib/maths/analytics/unittest/CBoostedTreeTest.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/maths/common/CLbfgs.h b/include/maths/common/CLbfgs.h index 897380058b..1fbf71b360 100644 --- a/include/maths/common/CLbfgs.h +++ b/include/maths/common/CLbfgs.h @@ -311,7 +311,7 @@ class CLbfgs { return -step; } } - // We could find a direction in which the fuction decreases. Returning + // We couldn't find a direction in which the fuction decreases. Returning // zero means we'll exit on the convergence condition. return las::zero(m_X); } diff --git a/lib/maths/analytics/unittest/CBoostedTreeTest.cc b/lib/maths/analytics/unittest/CBoostedTreeTest.cc index f0471a3eed..1ca79890c1 100644 --- a/lib/maths/analytics/unittest/CBoostedTreeTest.cc +++ b/lib/maths/analytics/unittest/CBoostedTreeTest.cc @@ -632,7 +632,7 @@ BOOST_AUTO_TEST_CASE(testNonLinear) { 0.0, modelBias[i][0], 4.0 * std::sqrt(noiseVariance / static_cast(trainRows))); // Good R^2... - BOOST_TEST_REQUIRE(modelRSquared[i][0] > 0.97); + BOOST_TEST_REQUIRE(modelRSquared[i][0] > 0.96); meanModelRSquared.add(modelRSquared[i][0]); } @@ -701,7 +701,7 @@ BOOST_AUTO_TEST_CASE(testHuber) { } LOG_DEBUG(<< "mean R^2 = " << maths::common::CBasicStatistics::mean(meanModelRSquared)); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanModelRSquared) > 0.97); + BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanModelRSquared) > 0.96); } BOOST_AUTO_TEST_CASE(testMsle) { From 79400ff5f6e545933c2953a2f9c41dd03aa380dd Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 15:44:41 +0100 Subject: [PATCH 4/6] Build fix --- lib/maths/common/unittest/CLbfgsTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/maths/common/unittest/CLbfgsTest.cc b/lib/maths/common/unittest/CLbfgsTest.cc index 4c82f5dc19..d207aeb5ee 100644 --- a/lib/maths/common/unittest/CLbfgsTest.cc +++ b/lib/maths/common/unittest/CLbfgsTest.cc @@ -246,7 +246,7 @@ BOOST_AUTO_TEST_CASE(testMinimizeWithVerySmallGradient) { }; auto g = [&](const TVector& x) { return 2.0 * eps * (x - xmin); }; - maths::CLbfgs lbfgs{10}; + maths::common::CLbfgs lbfgs{10}; TVector x; double fx; @@ -269,7 +269,7 @@ BOOST_AUTO_TEST_CASE(testMinimizeWithInitialZeroGradient) { }; auto g = [&](const TVector& x) { return x == x0 ? zero : 2.0 * (x - xmin); }; - maths::CLbfgs lbfgs{10}; + maths::common::CLbfgs lbfgs{10}; TVector x; double fx; From c2e1cc1c8ed848d0d35fbe4f7e58885eb8fd3454 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 17:52:29 +0100 Subject: [PATCH 5/6] Test thresholds for ARM --- lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc | 2 +- lib/maths/analytics/unittest/CBoostedTreeTest.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc index 11d13e2dc5..6a380162b7 100644 --- a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc +++ b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc @@ -573,7 +573,7 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeRegressionTrainingWithRowsMissingTargetVa BOOST_REQUIRE_CLOSE_ABSOLUTE( expected, result["row_results"]["results"]["ml"]["target_prediction"].GetDouble(), - 0.15 * expected); + 0.2 * expected); BOOST_REQUIRE_EQUAL( index < 40, result["row_results"]["results"]["ml"]["is_training"].GetBool()); diff --git a/lib/maths/analytics/unittest/CBoostedTreeTest.cc b/lib/maths/analytics/unittest/CBoostedTreeTest.cc index 1ca79890c1..427bd9a1a0 100644 --- a/lib/maths/analytics/unittest/CBoostedTreeTest.cc +++ b/lib/maths/analytics/unittest/CBoostedTreeTest.cc @@ -695,7 +695,7 @@ BOOST_AUTO_TEST_CASE(testHuber) { 0.0, modelBias[i], 4.0 * std::sqrt(noiseVariance / static_cast(trainRows))); // Good R^2... - BOOST_TEST_REQUIRE(modelRSquared[i] > 0.96); + BOOST_TEST_REQUIRE(modelRSquared[i] > 0.95); meanModelRSquared.add(modelRSquared[i]); } @@ -766,7 +766,7 @@ BOOST_AUTO_TEST_CASE(testLowTrainFractionPerFold) { // Unbiased... BOOST_REQUIRE_CLOSE_ABSOLUTE( - 0.0, bias, 4.0 * std::sqrt(noiseVariance / static_cast(trainRows))); + 0.0, bias, 7.0 * std::sqrt(noiseVariance / static_cast(trainRows))); // Good R^2... BOOST_TEST_REQUIRE(rSquared > 0.98); } From bfb10143a16198d1ab10ce00519102c18614c80a Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 19 Oct 2021 18:11:03 +0100 Subject: [PATCH 6/6] Comment --- include/maths/common/CLbfgs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/maths/common/CLbfgs.h b/include/maths/common/CLbfgs.h index 1fbf71b360..e6d8940806 100644 --- a/include/maths/common/CLbfgs.h +++ b/include/maths/common/CLbfgs.h @@ -251,7 +251,7 @@ class CLbfgs { fs = f(xs); } - // This can happen if |s m_P| < epsilon |m_X|. + // This can happen if s |m_P| < epsilon |m_X|. if (las::distance(xs, m_X) == 0.0) { this->fullLineSearch(f, xs, fs); }