[ML] Add offset in the MSLE computation (#1200)

valeriy42 · web-flow · commit bf8e5e23cc2b · 2020-05-08T19:46:08.000+02:00
While adding the additional function parameter in #1168, I wired it in the constructor of the MSLE loss function, but not in the computation of the objective. This PR fixes this, it basically substitutes log(1+x) by log(offset+x) in many different places. I mark it as a non-issue since the MSLE loss function was not released yet.
diff --git a/include/maths/CBoostedTreeLoss.h b/include/maths/CBoostedTreeLoss.h
@@ -76,7 +76,7 @@ class MATHS_EXPORT CArgMinMsleImpl final : public CArgMinLossImpl {
     using TObjective = std::function<double(double)>;
 
 public:
-    CArgMinMsleImpl(double lambda);
+    CArgMinMsleImpl(double lambda, double offset = 1.0);
     std::unique_ptr<CArgMinLossImpl> clone() const override;
     bool nextPass() override;
     void add(const TMemoryMappedFloatVector& prediction, double actual, double weight = 1.0) override;
@@ -122,6 +122,7 @@ class MATHS_EXPORT CArgMinMsleImpl final : public CArgMinLossImpl {
 
 private:
     std::size_t m_CurrentPass = 0;
+    double m_Offset = 1.0;
     TMinMaxAccumulator m_ExpPredictionMinMax;
     TMinMaxAccumulator m_LogActualMinMax;
     TVectorMeanAccumulatorVecVec m_Buckets;
diff --git a/lib/maths/CBoostedTreeLoss.cc b/lib/maths/CBoostedTreeLoss.cc
@@ -388,8 +388,8 @@ CArgMinMultinomialLogisticLossImpl::objectiveGradient() const {
     };
 }
 
-CArgMinMsleImpl::CArgMinMsleImpl(double lambda)
-    : CArgMinLossImpl{lambda}, m_Buckets(MSLE_BUCKET_SIZE) {
+CArgMinMsleImpl::CArgMinMsleImpl(double lambda, double offset)
+    : CArgMinLossImpl{lambda}, m_Offset{offset}, m_Buckets(MSLE_BUCKET_SIZE) {
     for (auto& bucket : m_Buckets) {
         bucket.resize(MSLE_BUCKET_SIZE);
     }
@@ -406,7 +406,7 @@ bool CArgMinMsleImpl::nextPass() {
 
 void CArgMinMsleImpl::add(const TMemoryMappedFloatVector& prediction, double actual, double weight) {
     double expPrediction{CTools::stableExp(prediction[0])};
-    double logActual{CTools::fastLog(1.0 + actual)};
+    double logActual{CTools::fastLog(m_Offset + actual)};
     switch (m_CurrentPass) {
     case 0: {
         m_ExpPredictionMinMax.add(expPrediction);
@@ -415,7 +415,7 @@ void CArgMinMsleImpl::add(const TMemoryMappedFloatVector& prediction, double act
         break;
     }
     case 1: {
-        double logError{logActual - CTools::fastLog(1.0 + expPrediction)};
+        double logError{logActual - CTools::fastLog(m_Offset + expPrediction)};
         TVector example;
         example(MSLE_PREDICTION_INDEX) = expPrediction;
         example(MSLE_ACTUAL_INDEX) = logActual;
@@ -497,7 +497,7 @@ CArgMinMsleImpl::TObjective CArgMinMsleImpl::objective() const {
         if (this->bucketWidth().first == 0.0) {
             // prediction is constant
             double expPrediction{m_ExpPredictionMinMax.max()};
-            double logPrediction{CTools::fastLog(1.0 + expPrediction * weight)};
+            double logPrediction{CTools::fastLog(m_Offset + expPrediction * weight)};
             double meanLogActual{CBasicStatistics::mean(m_MeanLogActual)};
             double meanLogActualSquared{CBasicStatistics::variance(m_MeanLogActual) +
                                         CTools::pow2(meanLogActual)};
@@ -514,7 +514,7 @@ CArgMinMsleImpl::TObjective CArgMinMsleImpl::objective() const {
                         const auto& bucketMean{CBasicStatistics::mean(bucketActual)};
                         double expPrediction{bucketMean(MSLE_PREDICTION_INDEX)};
                         double logActual{bucketMean(MSLE_ACTUAL_INDEX)};
-                        double logPrediction{CTools::fastLog(1.0 + expPrediction * weight)};
+                        double logPrediction{CTools::fastLog(m_Offset + expPrediction * weight)};
                         loss += count * CTools::pow2(logActual - logPrediction);
                         totalCount += count;
                     }
@@ -776,22 +776,22 @@ std::size_t CMsle::numberParameters() const {
 
 double CMsle::value(const TMemoryMappedFloatVector& logPrediction, double actual, double weight) const {
     double prediction{CTools::stableExp(logPrediction(0))};
-    double log1PlusPrediction{CTools::fastLog(1.0 + prediction)};
+    double logOffsetPrediction{CTools::stableLog(m_Offset + prediction)};
     if (actual < 0.0) {
         HANDLE_FATAL(<< "Input error: target value needs to be non-negative to use "
                      << "with MSLE loss, received: " << actual)
     }
-    double log1PlusActual{CTools::fastLog(1.0 + actual)};
-    return weight * CTools::pow2(log1PlusPrediction - log1PlusActual);
+    double logOffsetActual{CTools::stableLog(m_Offset + actual)};
+    return weight * CTools::pow2(logOffsetPrediction - logOffsetActual);
 }
 
 void CMsle::gradient(const TMemoryMappedFloatVector& logPrediction,
                      double actual,
                      TWriter writer,
                      double weight) const {
     double prediction{CTools::stableExp(logPrediction(0))};
-    double log1PlusPrediction{CTools::fastLog(1.0 + prediction)};
-    double log1PlusActual{CTools::fastLog(1.0 + actual)};
+    double log1PlusPrediction{CTools::stableLog(m_Offset + prediction)};
+    double log1PlusActual{CTools::stableLog(m_Offset + actual)};
     writer(0, 2.0 * weight * (log1PlusPrediction - log1PlusActual) / (prediction + 1.0));
 }
 
@@ -800,12 +800,13 @@ void CMsle::curvature(const TMemoryMappedFloatVector& logPrediction,
                       TWriter writer,
                       double weight) const {
     double prediction{CTools::stableExp(logPrediction(0))};
-    double log1PlusPrediction{CTools::fastLog(1.0 + prediction)};
-    double log1PlusActual{CTools::fastLog(1.0 + actual)};
+    double logOffsetPrediction{CTools::stableLog(m_Offset + prediction)};
+    double logOffsetActual{CTools::stableLog(m_Offset + actual)};
     // Apply L'Hopital's rule in the limit prediction -> actual.
-    writer(0, prediction == actual ? 0.0
-                                   : 2.0 * weight * (log1PlusPrediction - log1PlusActual) /
-                                         ((prediction + 1) * (prediction - actual)));
+    writer(0, prediction == actual
+                  ? 0.0
+                  : 2.0 * weight * (logOffsetPrediction - logOffsetActual) /
+                        ((prediction + m_Offset) * (prediction - actual)));
 }
 
 bool CMsle::isCurvatureConstant() const {