diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index 9c8f936e55..23b67e1d94 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -42,6 +42,8 @@
   classification and regression models. (See {ml-pull}2251[#2251].)
 * Fix possible source of "x = NaN, distribution = class boost::math::normal_distribution<..."
   log errors training classification and regression models. (See {ml-pull}2249[#2249].)
+* Fix some bugs affecting decision to stop optimising hyperparameters for training
+  classification and regression models. (See {ml-pull}2259[#2259].)
 
 == {es} version 8.2.0
 
diff --git a/include/maths/common/CBayesianOptimisation.h b/include/maths/common/CBayesianOptimisation.h
index 81bf6f25c2..f4719bfda3 100644
--- a/include/maths/common/CBayesianOptimisation.h
+++ b/include/maths/common/CBayesianOptimisation.h
@@ -125,9 +125,6 @@ class MATHS_COMMON_EXPORT CBayesianOptimisation {
     //! of the total variance.
     TDoubleDoublePrVec anovaMainEffects() const;
 
-    //! Set kernel \p parameters explicitly.
-    void kernelParameters(const TVector& parameters);
-
     //! Get the memory used by this object.
     std::size_t memoryUsage() const;
 
@@ -142,6 +139,9 @@ class MATHS_COMMON_EXPORT CBayesianOptimisation {
 
     //! \name Test Interface
     //@{
+    //! Set kernel \p parameters explicitly.
+    void kernelParameters(const TVector& parameters);
+
     //! Get minus the data likelihood and its gradient as a function of the kernel
     //! hyperparameters.
     std::pair<TLikelihoodFunc, TLikelihoodGradientFunc> minusLikelihoodAndGradient() const;
@@ -189,8 +189,9 @@ class MATHS_COMMON_EXPORT CBayesianOptimisation {
     TVectorDoublePr kernelCovariates(const TVector& a, const TVector& x, double vx) const;
     double kernel(const TVector& a, const TVector& x, const TVector& y) const;
     TVector kinvf() const;
-    TVector transformTo01(const TVector& x) const;
     double dissimilarity(const TVector& x) const;
+    TVector to01(TVector x) const;
+    TVector from01(TVector x) const;
     void checkRestoredInvariants() const;
 
 private:
diff --git a/lib/maths/analytics/unittest/CBoostedTreeTest.cc b/lib/maths/analytics/unittest/CBoostedTreeTest.cc
index 60cd3ae7bd..8d3ec34b08 100644
--- a/lib/maths/analytics/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/analytics/unittest/CBoostedTreeTest.cc
@@ -569,7 +569,7 @@ BOOST_AUTO_TEST_CASE(testLinear) {
             0.0, modelBias[i][0],
             6.0 * std::sqrt(noiseVariance / static_cast<double>(trainRows)));
         // Good R^2...
-        BOOST_TEST_REQUIRE(modelRSquared[i][0] > 0.97);
+        BOOST_TEST_REQUIRE(modelRSquared[i][0] > 0.94);
 
         meanModelRSquared.add(modelRSquared[i][0]);
     }
@@ -879,7 +879,7 @@ BOOST_AUTO_TEST_CASE(testLowCardinalityFeatures) {
         target, noiseVariance / static_cast<double>(rows));
     LOG_DEBUG(<< "bias = " << bias << ", rSquared = " << rSquared);
 
-    BOOST_TEST_REQUIRE(rSquared > 0.96);
+    BOOST_TEST_REQUIRE(rSquared > 0.95);
 }
 
 BOOST_AUTO_TEST_CASE(testLowTrainFractionPerFold) {
@@ -1189,7 +1189,7 @@ BOOST_AUTO_TEST_CASE(testCategoricalRegressors) {
     LOG_DEBUG(<< "bias = " << modelBias);
     LOG_DEBUG(<< " R^2 = " << modelRSquared);
     BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, modelBias, 0.1);
-    BOOST_TEST_REQUIRE(modelRSquared > 0.98);
+    BOOST_TEST_REQUIRE(modelRSquared > 0.97);
 }
 
 BOOST_AUTO_TEST_CASE(testFeatureBags) {
diff --git a/lib/maths/common/CBayesianOptimisation.cc b/lib/maths/common/CBayesianOptimisation.cc
index fc6411853d..577dafe1d6 100644
--- a/lib/maths/common/CBayesianOptimisation.cc
+++ b/lib/maths/common/CBayesianOptimisation.cc
@@ -16,6 +16,7 @@
 #include <core/CJsonStateRestoreTraverser.h>
 #include <core/CMemory.h>
 #include <core/CPersistUtils.h>
+#include <core/Constants.h>
 #include <core/RestoreMacros.h>
 
 #include <maths/common/CBasicStatistics.h>
@@ -30,6 +31,7 @@
 #include <boost/math/distributions/normal.hpp>
 #include <boost/optional/optional_io.hpp>
 
+#include <cmath>
 #include <exception>
 #include <limits>
 
@@ -61,6 +63,16 @@ const std::string RNG_TAG{"rng"};
 // narrow deep valley that the Gaussian Process hasn't sampled.
 const double MINIMUM_KERNEL_SCALE_FOR_EXPECTATION_MAXIMISATION{1e-8};
 
+//! Affine transform \p scale * (\p fx - \p shift).
+double toScaled(double shift, double scale, double fx) {
+    return scale * (fx - shift);
+}
+
+//! Affine transform \p shift + \p scale / \p fx.
+double fromScaled(double shift, double scale, double fx) {
+    return shift + fx / scale;
+}
+
 //! A version of the normal c.d.f. which is stable across our target platforms.
 double stableNormCdf(double z) {
     return CTools::stable(CTools::safeCdf(boost::math::normal{0.0, 1.0}, z));
@@ -114,9 +126,32 @@ CBayesianOptimisation::CBayesianOptimisation(core::CStateRestoreTraverser& trave
 }
 
 void CBayesianOptimisation::add(TVector x, double fx, double vx) {
-    m_FunctionMeanValues.emplace_back(x.cwiseQuotient(m_MaxBoundary - m_MinBoundary),
-                                      m_RangeScale * (fx - m_RangeShift));
-    m_ErrorVariances.push_back(CTools::pow2(m_RangeScale) * vx);
+    if (CMathsFuncs::isFinite(fx) == false || CMathsFuncs::isFinite(vx) == false) {
+        LOG_ERROR(<< "Discarding point (" << x.transpose() << "," << fx << "," << vx << ")");
+        return;
+    }
+
+    x = this->to01(std::move(x));
+    fx = toScaled(m_RangeShift, m_RangeScale, fx);
+    vx = CTools::pow2(m_RangeScale) * vx;
+
+    std::size_t duplicate(std::find_if(m_FunctionMeanValues.begin(),
+                                       m_FunctionMeanValues.end(),
+                                       [&](const auto& value) {
+                                           return (x - value.first).norm() == 0.0;
+                                       }) -
+                          m_FunctionMeanValues.begin());
+    if (duplicate < m_FunctionMeanValues.size()) {
+        auto& f = m_FunctionMeanValues[duplicate].second;
+        auto& v = m_ErrorVariances[duplicate];
+        auto moments = CBasicStatistics::momentsAccumulator(1.0, f, v) +
+                       CBasicStatistics::momentsAccumulator(1.0, fx, vx);
+        f = CBasicStatistics::mean(moments);
+        v = CBasicStatistics::maximumLikelihoodVariance(moments);
+    } else {
+        m_FunctionMeanValues.emplace_back(std::move(x), fx);
+        m_ErrorVariances.push_back(vx);
+    }
 }
 
 void CBayesianOptimisation::explainedErrorVariance(double vx) {
@@ -146,8 +181,8 @@ CBayesianOptimisation::maximumExpectedImprovement(double negligibleExpectedImpro
     TDoubleVec interpolates;
     CSampling::uniformSample(m_Rng, 0.0, 1.0, 10 * m_Restarts * interpolate.size(), interpolates);
 
-    TVector a{m_MinBoundary.cwiseQuotient(m_MaxBoundary - m_MinBoundary)};
-    TVector b{m_MaxBoundary.cwiseQuotient(m_MaxBoundary - m_MinBoundary)};
+    TVector a{TVector::Zero(m_MinBoundary.size())};
+    TVector b{TVector::Ones(m_MaxBoundary.size())};
     TVector x;
     TMeanAccumulator rho_;
     TMinAccumulator probes{m_Restarts};
@@ -217,7 +252,7 @@ CBayesianOptimisation::maximumExpectedImprovement(double negligibleExpectedImpro
         expectedImprovement = fmax / m_RangeScale;
     }
 
-    xmax = xmax.cwiseProduct(m_MaxBoundary - m_MinBoundary);
+    xmax = this->from01(std::move(xmax));
     LOG_TRACE(<< "best = " << xmax.transpose() << " EI(best) = " << expectedImprovement);
 
     return {std::move(xmax), expectedImprovement};
@@ -230,9 +265,8 @@ double CBayesianOptimisation::evaluate(const TVector& input) const {
 double CBayesianOptimisation::evaluate(const TVector& Kinvf, const TVector& input) const {
     TVector Kxn;
     std::tie(Kxn, std::ignore) = this->kernelCovariates(
-        m_KernelParameters, input.cwiseQuotient(m_MaxBoundary - m_MinBoundary),
-        this->meanErrorVariance());
-    return Kxn.transpose() * Kinvf;
+        m_KernelParameters, this->to01(input), this->meanErrorVariance());
+    return fromScaled(m_RangeShift, m_RangeScale, Kxn.transpose() * Kinvf);
 }
 
 double CBayesianOptimisation::evaluate1D(const TVector& Kinvf, double input, int dimension) const {
@@ -250,20 +284,29 @@ double CBayesianOptimisation::evaluate1D(const TVector& Kinvf, double input, int
     input = (input - m_MinBoundary(dimension)) /
             (m_MaxBoundary(dimension) - m_MinBoundary(dimension));
     for (std::size_t i = 0; i < m_FunctionMeanValues.size(); ++i) {
-        TVector x{this->transformTo01(m_FunctionMeanValues[i].first)};
+        const TVector& x{m_FunctionMeanValues[i].first};
         sum += Kinvf(static_cast<int>(i)) *
                CTools::stableExp(-(CTools::pow2(m_KernelParameters[dimension + 1]) +
                                    MINIMUM_KERNEL_COORDINATE_DISTANCE_SCALE) *
                                  CTools::pow2(input - x(dimension))) *
                prodXt(x, dimension);
     }
-
-    // We rewrite theta_0^2 sum - f_0 as (theta + f) * (theta - f) where
-    // theta = theta_0 sum^(1/2) and f = f_0^(1/2) because it has better
-    // numerics.
-    double theta{m_KernelParameters(0) * std::sqrt(sum)};
-    double f{std::sqrt(this->anovaConstantFactor(Kinvf))};
-    return (theta + f) * (theta - f);
+    double f2{this->anovaConstantFactor(Kinvf)};
+
+    // We only get cancellation if the signs are the same (and we need also
+    // to take the square root of both sum and f2 for which they need to be
+    // positive).
+    if (std::signbit(sum) == std::signbit(f2)) {
+        // We rewrite theta_0^2 sum - f_0 as (theta + f) * (theta - f) where
+        // theta = theta_0 sum^(1/2) and f = f_0^(1/2) because it has better
+        // numerics.
+        double theta{m_KernelParameters(0) * std::sqrt(std::fabs(sum))};
+        double f{std::sqrt(std::fabs(f2))};
+        return fromScaled(m_RangeShift, m_RangeScale,
+                          std::copysign(1.0, sum) * (theta + f) * (theta - f));
+    }
+    return fromScaled(m_RangeShift, m_RangeScale,
+                      CTools::pow2(m_KernelParameters(0)) * sum - f2);
 }
 
 double CBayesianOptimisation::evaluate1D(double input, int dimension) const {
@@ -279,7 +322,7 @@ double CBayesianOptimisation::anovaConstantFactor(const TVector& Kinvf) const {
     double sum{0.0};
     for (std::size_t i = 0; i < m_FunctionMeanValues.size(); ++i) {
         double prod{1.0};
-        TVector x{this->transformTo01(m_FunctionMeanValues[i].first)};
+        const TVector& x{m_FunctionMeanValues[i].first};
         for (int d = 0; d < x.size(); ++d) {
             prod *= integrate1dKernel(m_KernelParameters(d + 1), x(d));
         }
@@ -294,8 +337,8 @@ double CBayesianOptimisation::anovaConstantFactor() const {
 
 double CBayesianOptimisation::anovaTotalVariance(const TVector& Kinvf) const {
     auto prodIj = [&Kinvf, this](std::size_t i, std::size_t j) -> double {
-        TVector xi{this->transformTo01(m_FunctionMeanValues[i].first)};
-        TVector xj{this->transformTo01(m_FunctionMeanValues[j].first)};
+        const TVector& xi{m_FunctionMeanValues[i].first};
+        const TVector& xj{m_FunctionMeanValues[j].first};
         double prod{1.0};
         for (int d = 0; d < xi.size(); ++d) {
             prod *= integrate1dKernelProduct(m_KernelParameters[d + 1], xi(d), xj(d));
@@ -311,17 +354,22 @@ double CBayesianOptimisation::anovaTotalVariance(const TVector& Kinvf) const {
         }
     }
 
-    // We rewrite theta_0^4 sum - f_0^2 as (theta^2 + f_0) * (theta^2 - f_0)
-    // where theta^2 = theta_0^2 sum^(1/2) because it has better numerics.
-    double theta2{CTools::pow2(m_KernelParameters(0)) * std::sqrt(sum)};
+    double theta2{CTools::pow2(m_KernelParameters(0))};
     double f0{this->anovaConstantFactor(Kinvf)};
-    double variance{(theta2 + f0) * (theta2 - f0)};
-    return std::max(0.0, variance);
+    double scale2{CTools::pow2(m_RangeScale)};
+    if (sum > 0.0) {
+        // We rewrite theta_0^4 sum - f_0^2 as (theta^2 + f_0) * (theta^2 - f_0)
+        // where theta^2 = theta_0^2 sum^(1/2) because it has better numerics.
+        theta2 *= std::sqrt(sum);
+        double variance{(theta2 + f0) * (theta2 - f0)};
+        return std::max(0.0, variance / scale2);
+    }
+    return std::max(0.0, (theta2 * theta2 * sum - f0 * f0) / scale2);
 }
 
 double CBayesianOptimisation::anovaTotalCoefficientOfVariation() {
     this->precondition();
-    return std::sqrt(this->anovaTotalVariance()) * m_RangeScale / m_RangeShift;
+    return std::sqrt(this->anovaTotalVariance()) / m_RangeShift;
 }
 
 double CBayesianOptimisation::anovaTotalVariance() const {
@@ -341,9 +389,9 @@ double CBayesianOptimisation::anovaMainEffect(const TVector& Kinvf, int dimensio
     double sum1{0.0};
     double sum2{0.0};
     for (std::size_t i = 0; i < m_FunctionMeanValues.size(); ++i) {
-        TVector xi{this->transformTo01(m_FunctionMeanValues[i].first)};
+        const TVector& xi{m_FunctionMeanValues[i].first};
         for (std::size_t j = 0; j < m_FunctionMeanValues.size(); ++j) {
-            TVector xj{this->transformTo01(m_FunctionMeanValues[j].first)};
+            const TVector& xj{m_FunctionMeanValues[j].first};
             sum1 += Kinvf(static_cast<int>(i)) * Kinvf(static_cast<int>(j)) *
                     prodXt(xi, dimension) * prodXt(xj, dimension) *
                     integrate1dKernelProduct(m_KernelParameters(dimension + 1),
@@ -353,12 +401,11 @@ double CBayesianOptimisation::anovaMainEffect(const TVector& Kinvf, int dimensio
                 integrate1dKernel(m_KernelParameters(dimension + 1), xi(dimension)) *
                 prodXt(xi, dimension);
     }
+    double scale2{CTools::pow2(m_RangeScale)};
     double theta02{CTools::pow2(m_KernelParameters(0))};
-    double theta04{CTools::pow2(theta02)};
     double f0{this->anovaConstantFactor()};
     double f02{CTools::pow2(f0)};
-    double scale{std::max(1.0, theta04)}; // prevent cancellation errors
-    return scale * (theta04 * sum1 / scale - 2 * theta02 * sum2 * f0 / scale + f02 / scale);
+    return (theta02 * (theta02 * sum1 - 2.0 * f0 * sum2) + f02) / scale2;
 }
 
 double CBayesianOptimisation::anovaMainEffect(int dimension) const {
@@ -375,7 +422,7 @@ CBayesianOptimisation::TDoubleDoublePrVec CBayesianOptimisation::anovaMainEffect
     mainEffects.reserve(static_cast<std::size_t>(m_MinBoundary.size()));
     TVector Kinvf{this->kinvf()};
     double f0{this->anovaConstantFactor(Kinvf)};
-    double totalVariance(this->anovaTotalVariance(Kinvf));
+    double totalVariance{this->anovaTotalVariance(Kinvf)};
     for (int i = 0; i < m_MinBoundary.size(); ++i) {
         double effect{this->anovaMainEffect(Kinvf, i)};
         mainEffects.emplace_back(effect, effect / totalVariance);
@@ -389,6 +436,8 @@ CBayesianOptimisation::TDoubleDoublePrVec CBayesianOptimisation::anovaMainEffect
 void CBayesianOptimisation::kernelParameters(const TVector& parameters) {
     if (m_KernelParameters.size() == parameters.size()) {
         m_KernelParameters = parameters;
+        m_RangeShift = 0.0;
+        m_RangeScale = 1.0;
     }
 }
 
@@ -399,40 +448,45 @@ CBayesianOptimisation::minusLikelihoodAndGradient() const {
     double v{this->meanErrorVariance()};
     TVector ones;
     TVector gradient;
-    Eigen::LDLT<Eigen::MatrixXd> Kldl;
+    Eigen::ColPivHouseholderQR<Eigen::MatrixXd> Kqr;
     TMatrix K;
     TVector Kinvf;
     TMatrix Kinv;
     TMatrix dKdai;
+    double eps{1e-4};
+
+    // We need to be careful when we compute the kernel decomposition. Basically,
+    // if the kernel matrix is singular to working precision then if the function
+    // value vector projection onto the null-space has non-zero length the likelihood
+    // function is effectively -infinity. This follow from the fact that although
+    // log(1 / lambda_i) -> +infinity, -1/2 sum_i{ ||f_i||^2 / lambda_i } -> -infinity
+    // faster for all ||f_i|| > 0 and lambda_i sufficiently small. Here {lambda_i}
+    // denote the Eigenvalues of the nullspace. We use a rank revealing decomposition
+    // and compute the likelihood on the row space.
 
     auto minusLogLikelihood = [=](const TVector& a) mutable -> double {
-        K = this->kernel(a, v);
-        Kldl.compute(K);
-        Kinvf.noalias() = f;
-        Kldl.solveInPlace(Kinvf);
-
-        // We can only determine values up to eps * "max diagonal". If the diagonal
-        // has a zero it blows up the determinant term. In practice, we know the
-        // kernel can't be singular by construction so we perturb the diagonal by
-        // the numerical error in such a way as to recover a non-singular matrix.
-        // (Note that the solve routine deals with the zero for us.)
-        double eps{std::numeric_limits<double>::epsilon() * Kldl.vectorD().maxCoeff()};
-        return 0.5 * (f.transpose() * Kinvf +
-                      Kldl.vectorD().cwiseMax(eps).array().log().sum());
+        K = this->kernel(a, v + eps);
+        Kqr.compute(K);
+        Kinvf.noalias() = Kqr.solve(f);
+        // Note that Kqr.logAbsDeterminant() = -infinity if K is singular.
+        double logAbsDet{0.0};
+        for (int i = 0; i < Kqr.rank(); ++i) {
+            logAbsDet += std::log(std::fabs(Kqr.matrixR()(i, i)));
+        }
+        logAbsDet = CTools::stable(logAbsDet);
+        return 0.5 * (f.transpose() * Kinvf + logAbsDet);
     };
 
     auto minusLogLikelihoodGradient = [=](const TVector& a) mutable -> TVector {
-        K = this->kernel(a, v);
-        Kldl.compute(K);
+        K = this->kernel(a, v + eps);
+        Kqr.compute(K);
 
-        Kinvf.noalias() = f;
-        Kldl.solveInPlace(Kinvf);
+        Kinvf.noalias() = Kqr.solve(f);
 
         ones = TVector::Ones(f.size());
-        Kinv.noalias() = TMatrix::Identity(f.size(), f.size());
-        Kldl.solveInPlace(Kinv);
+        Kinv.noalias() = Kqr.solve(TMatrix::Identity(f.size(), f.size()));
 
-        K.diagonal() -= v * ones;
+        K.diagonal() -= (v + eps) * ones;
 
         gradient = TVector::Zero(a.size());
         for (int i = 0; i < Kinvf.size(); ++i) {
@@ -553,46 +607,57 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const {
 
 const CBayesianOptimisation::TVector& CBayesianOptimisation::maximumLikelihoodKernel() {
 
-    // Use random restarts of L-BFGS to find maximum likelihood parameters.
+    if (m_FunctionMeanValues.size() < 2) {
+        return m_KernelParameters;
+    }
 
-    this->precondition();
+    using TDoubleVecVec = std::vector<TDoubleVec>;
 
-    std::size_t n(m_KernelParameters.size());
-
-    // We restart optimization with initial guess on different scales for global probing.
-    TDoubleVec scales;
-    scales.reserve(10 * (m_Restarts - 1) * n);
-    CSampling::uniformSample(m_Rng, CTools::stableLog(0.2), CTools::stableLog(5.0),
-                             10 * (m_Restarts - 1) * n, scales);
+    this->precondition();
 
     TLikelihoodFunc l;
     TLikelihoodGradientFunc g;
     std::tie(l, g) = this->minusLikelihoodAndGradient();
 
+    CLbfgs<TVector> lbfgs{10};
+
+    double lmax{l(m_KernelParameters)};
+    TVector amax{m_KernelParameters};
+
+    // Try the current values first.
+    double la;
+    TVector a;
+    std::tie(a, la) = lbfgs.minimize(l, g, m_KernelParameters, 1e-8, 75);
+    if (COrderings::lexicographical_compare(la, a.norm(), lmax, amax.norm())) {
+        lmax = la;
+        amax = a;
+    }
+
     TMinAccumulator probes{m_Restarts - 1};
 
-    TVector scale{n};
-    for (std::size_t i = 0; i < scales.size(); /**/) {
-        TVector a{m_KernelParameters};
-        for (std::size_t j = 0; j < n; ++i, ++j) {
-            scale(j) = CTools::stableExp(scales[i]);
+    // We restart optimization with scales of the current values for global probing.
+    std::size_t n(m_KernelParameters.size());
+    TDoubleVecVec scales;
+    scales.reserve(10 * (m_Restarts - 1));
+    CSampling::sobolSequenceSample(n, 10 * (m_Restarts - 1), scales);
+
+    for (const auto& scale : scales) {
+        a.noalias() = m_KernelParameters;
+        for (std::size_t j = 0; j < n; ++j) {
+            a(j) *= CTools::stableExp(CTools::linearlyInterpolate(
+                0.0, 1.0, std::log(0.2), std::log(2.0), scale[j]));
+        }
+        la = l(a);
+        if (COrderings::lexicographical_compare(la, a.norm(), lmax, amax.norm())) {
+            lmax = la;
+            amax = a;
         }
-        a.array() *= scale.array();
-        double la{l(a)};
         probes.add({la, std::move(a)});
     }
 
-    CLbfgs<TVector> lbfgs{10};
-
-    double lmax;
-    TVector amax;
-    std::tie(amax, lmax) = lbfgs.minimize(l, g, m_KernelParameters, 1e-8, 75);
-
-    double la;
-    TVector a;
     for (auto& a0 : probes) {
         std::tie(a, la) = lbfgs.minimize(l, g, std::move(a0.second), 1e-8, 75);
-        if (COrderings::lexicographical_compare(la, a, lmax, amax)) {
+        if (COrderings::lexicographical_compare(la, a.norm(), lmax, amax.norm())) {
             lmax = la;
             amax = std::move(a);
         }
@@ -615,7 +680,7 @@ void CBayesianOptimisation::precondition() {
     // for different loss functions.
 
     for (auto& value : m_FunctionMeanValues) {
-        value.second = m_RangeShift + value.second / m_RangeScale;
+        value.second = fromScaled(m_RangeShift, m_RangeScale, value.second);
     }
     for (auto& variance : m_ErrorVariances) {
         variance /= CTools::pow2(m_RangeScale);
@@ -633,7 +698,7 @@ void CBayesianOptimisation::precondition() {
                        : 1.0 / std::sqrt(CBasicStatistics::variance(valueMoments));
 
     for (auto& value : m_FunctionMeanValues) {
-        value.second = m_RangeScale * (value.second - m_RangeShift);
+        value.second = toScaled(m_RangeShift, m_RangeScale, value.second);
     }
     for (auto& variance : m_ErrorVariances) {
         variance *= CTools::pow2(m_RangeScale);
@@ -737,10 +802,6 @@ CBayesianOptimisation::TVector CBayesianOptimisation::kinvf() const {
     return Kinvf;
 }
 
-CBayesianOptimisation::TVector CBayesianOptimisation::transformTo01(const TVector& x) const {
-    return x - m_MinBoundary.cwiseQuotient(m_MaxBoundary - m_MinBoundary);
-}
-
 double CBayesianOptimisation::dissimilarity(const TVector& x) const {
     // This is used as a fallback when GP is very unsure we can actually make progress,
     // i.e. EI is miniscule. In this case we fallback to a different strategy to break
@@ -761,6 +822,17 @@ double CBayesianOptimisation::dissimilarity(const TVector& x) const {
     return sum / static_cast<double>(m_FunctionMeanValues.size()) + min;
 }
 
+CBayesianOptimisation::TVector CBayesianOptimisation::to01(TVector x) const {
+    // Self assign so operations are performed inplace.
+    x = (x - m_MinBoundary).cwiseQuotient(m_MaxBoundary - m_MinBoundary);
+    return x;
+}
+
+CBayesianOptimisation::TVector CBayesianOptimisation::from01(TVector x) const {
+    x = m_MinBoundary + x.cwiseProduct(m_MaxBoundary - m_MinBoundary);
+    return x;
+}
+
 void CBayesianOptimisation::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     try {
         core::CPersistUtils::persist(VERSION_7_5_TAG, "", inserter);
diff --git a/lib/maths/common/unittest/CBayesianOptimisationTest.cc b/lib/maths/common/unittest/CBayesianOptimisationTest.cc
index ff9ef01e77..4c1c93cf14 100644
--- a/lib/maths/common/unittest/CBayesianOptimisationTest.cc
+++ b/lib/maths/common/unittest/CBayesianOptimisationTest.cc
@@ -9,6 +9,7 @@
  * limitation.
  */
 
+#include <core/CContainerPrinter.h>
 #include <core/CJsonStatePersistInserter.h>
 #include <core/CJsonStateRestoreTraverser.h>
 
@@ -23,6 +24,7 @@
 
 #include <boost/test/unit_test.hpp>
 
+#include <limits>
 #include <vector>
 
 BOOST_AUTO_TEST_SUITE(CBayesianOptimisationTest)
@@ -33,6 +35,15 @@ namespace {
 using TDoubleVec = std::vector<double>;
 using TDoubleVecVec = std::vector<TDoubleVec>;
 using TVector = maths::common::CDenseVector<double>;
+using TVectorVec = std::vector<TVector>;
+using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
+struct SFunctionParams {
+    double s_Xl;
+    double s_Xu;
+    double s_F0;
+    double s_Scale;
+};
+using TFunctionParamsVec = std::vector<SFunctionParams>;
 
 TVector vector(TDoubleVec components) {
     TVector result(components.size());
@@ -176,16 +187,14 @@ BOOST_AUTO_TEST_CASE(testMaximumLikelihoodKernel) {
     for (std::size_t test = 0; test < 50; ++test) {
 
         maths::common::CBayesianOptimisation bopt{
-            {{-10.0, 10.0}, {-10.0, 10.0}, {-10.0, 10.0}, {-10.0, 10.0}}};
-
-        double scale{5.0 / std::sqrt(static_cast<double>(test + 1))};
+            {{0.0, 10.0}, {0.0, 10.0}, {0.0, 10.0}, {0.0, 10.0}}};
 
         for (std::size_t i = 0; i < 10; ++i) {
             rng.generateUniformSamples(-10.0, 10.0, 4, coordinates);
             rng.generateNormalSamples(0.0, 2.0, 1, noise);
             TVector x{vector(coordinates)};
-            double fx{scale * x.squaredNorm() + noise[0]};
-            bopt.add(x, fx, 1.0);
+            double fx{x.squaredNorm() + noise[0]};
+            bopt.add(x, fx, 0.1);
         }
 
         TVector parameters{bopt.maximumLikelihoodKernel()};
@@ -202,9 +211,9 @@ BOOST_AUTO_TEST_CASE(testMaximumLikelihoodKernel) {
         TVector eps{parameters.size()};
         eps.setZero();
         for (std::size_t i = 0; i < 4; ++i) {
-            eps(i) = 0.1;
+            eps(i) = 0.01;
             double minusLPlusEps{l(parameters + eps)};
-            eps(i) = -0.1;
+            eps(i) = -0.01;
             double minusLMinusEps{l(parameters + eps)};
             eps(i) = 0.0;
             BOOST_TEST_REQUIRE(minusML < minusLPlusEps);
@@ -268,8 +277,6 @@ BOOST_AUTO_TEST_CASE(testMaximumExpectedImprovement) {
     // We check the value of the function we find after fixed number of iterations
     // vs a random search baseline.
 
-    using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
-
     test::CRandomNumbers rng;
     TDoubleVec centreCoordinates;
     TDoubleVec coordinateScales;
@@ -359,14 +366,130 @@ BOOST_AUTO_TEST_CASE(testMaximumExpectedImprovement) {
               << 100.0 * maths::common::CBasicStatistics::mean(meanImprovementRs));
     BOOST_TEST_REQUIRE(wins > static_cast<std::size_t>(0.95 * 50)); // 95% better
     BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanImprovementBopt) >
-                       1.6 * maths::common::CBasicStatistics::mean(meanImprovementRs)); // 60% mean improvement
+                       1.5 * maths::common::CBasicStatistics::mean(meanImprovementRs)); // 50% mean improvement
+}
+
+BOOST_AUTO_TEST_CASE(testKernelInvariants) {
+
+    // Test that the kernel parameters we estimate do not change when:
+    //   1. Changing the function domain,
+    //   2. Changing the function level,
+    //   3. Linearly scaling the function.
+
+    TFunctionParamsVec tests{{0.0, 100.0, 0.0, 1.0},
+                             {0.0, 1000.0, 0.0, 1.0},
+                             {0.0, 100.0, 10.0, 1.0},
+                             {0.0, 100.0, 0.0, 2.0}};
+
+    TVectorVec kernelParameters;
+
+    for (const auto& test : tests) {
+
+        test::CRandomNumbers rng;
+
+        std::size_t dimension{2};
+        double xl{test.s_Xl};
+        double xu{test.s_Xu};
+        double f0{test.s_F0};
+        double scale{test.s_Scale};
+
+        TDoubleVec coords;
+        rng.generateUniformSamples(xl, xu, dimension * 20, coords);
+
+        maths::common::CBayesianOptimisation::TDoubleDoublePrVec bb;
+        for (std::size_t i = 0; i < dimension; ++i) {
+            bb.emplace_back(xl, xu);
+        }
+
+        maths::common::CBayesianOptimisation bopt{bb};
+        for (std::size_t i = 0; i < 10; ++i) {
+            TVector x{dimension};
+            for (std::size_t j = 0; j < dimension; ++j) {
+                x(j) = coords[i * dimension + j];
+            }
+            bopt.maximumLikelihoodKernel();
+            bopt.add(x, scale * x.norm() + f0, scale * scale * (xu - xl) * (xu - xl) * 0.0001);
+        }
+
+        kernelParameters.push_back(bopt.maximumLikelihoodKernel());
+    }
+
+    for (std::size_t i = 1; i < kernelParameters.size(); ++i) {
+        BOOST_TEST_REQUIRE((kernelParameters[i] - kernelParameters[0]).norm() < 1e-6);
+    }
+}
+
+BOOST_AUTO_TEST_CASE(testForSingularKernel) {
+
+    // Explore some edge cases where the kernel can go singular.
+
+    // Test that decreasing additive variance. In this case the maximum likelihood
+    // can decide it is a good idea to force a singular kernel matrix if we don't
+    // compute the likelihood carefully. We should see the kernel parameters smoothly
+    // converge towards the case the additive variance is zero as we reduce it.
+    TVectorVec kernels;
+    std::size_t dimension{3};
+    std::size_t n{30};
+    double xl{-100.0};
+    double xu{100.0};
+    for (auto v : {0.1, 0.01, 0.001, 0.00001, 0.0}) {
+        test::CRandomNumbers rng;
+        TDoubleVec coords;
+        rng.generateUniformSamples(xl, xu, dimension * n, coords);
+
+        maths::common::CBayesianOptimisation::TDoubleDoublePrVec bb;
+        for (std::size_t i = 0; i < dimension; ++i) {
+            bb.emplace_back(xl, xu);
+        }
+
+        maths::common::CBayesianOptimisation bopt{bb};
+        for (std::size_t i = 0; i < n; ++i) {
+            TVector x{dimension};
+            for (std::size_t j = 0; j < dimension; ++j) {
+                x(j) = coords[i * dimension + j];
+            }
+            bopt.maximumLikelihoodKernel();
+            bopt.add(x, x.norm(), v);
+        }
+
+        auto kernel = bopt.maximumLikelihoodKernel();
+        LOG_DEBUG(<< "kernel = " << kernel.transpose());
+        kernels.push_back(kernel);
+    }
+
+    double lastNorm{std::numeric_limits<double>::max()};
+    for (std::size_t i = 0; i + 1 < kernels.size(); ++i) {
+        double norm{(kernels[kernels.size() - 1] - kernels[i]).norm()};
+        BOOST_TEST_REQUIRE(norm < lastNorm);
+        BOOST_TEST_REQUIRE(norm / kernels[i].norm() < 0.05);
+        lastNorm = norm;
+    }
+
+    // Adding a duplicate point would create a singular kernel if we didn't explicitly
+    // deduplicate.
+    maths::common::CBayesianOptimisation::TDoubleDoublePrVec bb{{0.0, 1.0}};
+    maths::common::CBayesianOptimisation bopt{bb};
+    for (auto x : {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}) {
+        bopt.add(x * TVector::Ones(1), 1.0 + x, 0.0);
+        bopt.maximumLikelihoodKernel();
+    }
+    auto kernelBefore = bopt.maximumLikelihoodKernel();
+    LOG_DEBUG(<< "kernel before duplicate = " << kernelBefore.transpose());
+
+    bopt.add(0.1 * TVector::Ones(1), 1.0 + 0.1, 0.0);
+    auto kernelAfter = bopt.maximumLikelihoodKernel();
+    LOG_DEBUG(<< "kernel after duplicate  = " << kernelAfter.transpose());
+
+    // Check that the decay rate is not significantly changed.
+    BOOST_TEST_REQUIRE(std::fabs(kernelAfter(1) - kernelBefore(1)) <
+                       0.001 * std::fabs(kernelBefore(1)));
 }
 
 BOOST_AUTO_TEST_CASE(testPersistRestore) {
     // 1d
     {
-        TDoubleVec minBoundary{0.};
-        TDoubleVec maxBoundary{10.};
+        TDoubleVec minBoundary{0.0};
+        TDoubleVec maxBoundary{10.0};
         // empty
         {
             std::vector<TDoubleVec> parameterFunctionValues{};
@@ -375,8 +498,8 @@ BOOST_AUTO_TEST_CASE(testPersistRestore) {
         // with data
         {
             std::vector<TDoubleVec> parameterFunctionValues{
-                {5., 1., 0.2},
-                {7., 1., 0.2},
+                {5.0, 1.0, 0.2},
+                {7.0, 1.0, 0.2},
             };
             testPersistRestoreIsIdempotent(minBoundary, maxBoundary, parameterFunctionValues);
         }
@@ -384,8 +507,8 @@ BOOST_AUTO_TEST_CASE(testPersistRestore) {
 
     // 2d
     {
-        TDoubleVec minBoundary{0., -1.};
-        TDoubleVec maxBoundary{10., 1.};
+        TDoubleVec minBoundary{0.0, -1.0};
+        TDoubleVec maxBoundary{10.0, 1.0};
         // empty
         {
             std::vector<TDoubleVec> parameterFunctionValues{};
@@ -394,8 +517,8 @@ BOOST_AUTO_TEST_CASE(testPersistRestore) {
         // with data
         {
             std::vector<TDoubleVec> parameterFunctionValues{
-                {5., 0., 1., 0.2},
-                {7., 0., 1., 0.2},
+                {5.0, 0.0, 1.0, 0.2},
+                {7.0, 0.0, 1.0, 0.2},
             };
             testPersistRestoreIsIdempotent(minBoundary, maxBoundary, parameterFunctionValues);
         }
@@ -433,7 +556,6 @@ BOOST_AUTO_TEST_CASE(testEvaluate) {
 }
 
 BOOST_AUTO_TEST_CASE(testEvaluate1D) {
-    using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
     test::CRandomNumbers rng;
     std::size_t dim{2};
     std::size_t mcSamples{100};
@@ -460,8 +582,6 @@ BOOST_AUTO_TEST_CASE(testEvaluate1D) {
 }
 
 BOOST_AUTO_TEST_CASE(testAnovaConstantFactor) {
-    using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
-
     std::size_t dim{2};
     std::size_t mcSamples{1000};
     TDoubleVecVec testSamples;
@@ -484,7 +604,6 @@ BOOST_AUTO_TEST_CASE(testAnovaConstantFactor) {
 }
 
 BOOST_AUTO_TEST_CASE(testAnovaTotalVariance) {
-    using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
     std::size_t dim{2};
     std::size_t mcSamples{1000};
     TDoubleVecVec testSamples;
@@ -508,7 +627,6 @@ BOOST_AUTO_TEST_CASE(testAnovaTotalVariance) {
 }
 
 BOOST_AUTO_TEST_CASE(testAnovaMainEffect) {
-    using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
     std::size_t dim{2};
     std::size_t mcSamples{1000};
     TDoubleVecVec testSamples;
@@ -533,4 +651,84 @@ BOOST_AUTO_TEST_CASE(testAnovaMainEffect) {
     verify(0.2, 0.8);
 }
 
+BOOST_AUTO_TEST_CASE(testAnovaInvariants) {
+
+    // Test that the various parts of ANOVA change as we expect when:
+    //   1. Changing the function level,
+    //   2. Linearly scaling the function.
+
+    TFunctionParamsVec tests{
+        {0.0, 100.0, 0.0, 1.0}, {0.0, 100.0, 10.0, 1.0}, {0.0, 100.0, 0.0, 2.0}};
+
+    TDoubleVec evaluateResults;
+    TDoubleVecVec evaluate1DResults;
+    TDoubleVec totalVarianceResults;
+    TDoubleVec totalCoefficientOfVariationResults;
+
+    for (const auto& test : tests) {
+
+        test::CRandomNumbers rng;
+
+        std::size_t dimension{2};
+        double xl{test.s_Xl};
+        double xu{test.s_Xu};
+        double f0{test.s_F0};
+        double scale{test.s_Scale};
+
+        TDoubleVec coords;
+        rng.generateUniformSamples(xl, xu, dimension * 20, coords);
+
+        maths::common::CBayesianOptimisation::TDoubleDoublePrVec bb;
+        for (std::size_t i = 0; i < dimension; ++i) {
+            bb.emplace_back(xl, xu);
+        }
+
+        maths::common::CBayesianOptimisation bopt{bb};
+        for (std::size_t i = 0; i < 10; ++i) {
+            TVector x{dimension};
+            for (std::size_t j = 0; j < dimension; ++j) {
+                x(j) = coords[i * dimension + j];
+            }
+            bopt.maximumLikelihoodKernel();
+            bopt.add(x, scale * x.norm() + f0, scale * scale * (xu - xl) * (xu - xl) * 0.001);
+        }
+
+        TVector probe{dimension};
+        rng.generateUniformSamples(xl, xu, dimension, coords);
+        for (std::size_t i = 0; i < dimension; ++i) {
+            probe(i) = coords[i];
+        }
+        evaluateResults.push_back(bopt.evaluate(probe));
+        evaluate1DResults.emplace_back();
+        for (std::size_t i = 0; i < dimension; ++i) {
+            evaluate1DResults.back().push_back(
+                bopt.evaluate1D(probe[i], static_cast<int>(i)));
+        }
+        totalVarianceResults.push_back(bopt.anovaTotalVariance());
+        totalCoefficientOfVariationResults.push_back(bopt.anovaTotalCoefficientOfVariation());
+    }
+
+    LOG_DEBUG(<< "evaluate      = " << core::CContainerPrinter::print(evaluateResults));
+    LOG_DEBUG(<< "evaluate1D    = " << core::CContainerPrinter::print(evaluate1DResults));
+    LOG_DEBUG(<< "totalVariance = " << core::CContainerPrinter::print(totalVarianceResults));
+    LOG_DEBUG(<< "totalCoefficientOfVariationResults = "
+              << core::CContainerPrinter::print(totalCoefficientOfVariationResults));
+
+    for (std::size_t i = 1; i < tests.size(); ++i) {
+        double f0{tests[i].s_F0};
+        double scale{tests[i].s_Scale};
+        BOOST_REQUIRE_CLOSE(evaluateResults[i], scale * evaluateResults[0] + f0, 1e-3);
+        for (std::size_t j = 0; j < evaluate1DResults[i].size(); ++j) {
+            BOOST_REQUIRE_CLOSE(evaluate1DResults[i][j],
+                                scale * evaluate1DResults[0][j] + f0, 1e-3);
+        }
+        BOOST_REQUIRE_CLOSE(totalVarianceResults[i],
+                            scale * scale * totalVarianceResults[0], 1e-3);
+    }
+    BOOST_TEST_REQUIRE(totalCoefficientOfVariationResults[1] <
+                       totalCoefficientOfVariationResults[0]);
+    BOOST_REQUIRE_CLOSE(totalCoefficientOfVariationResults[2],
+                        totalCoefficientOfVariationResults[0], 1e-3);
+}
+
 BOOST_AUTO_TEST_SUITE_END()