fix reduction modes and misc issues (#7082)

* Fix reduction modes for Hinge Loss * Fix reductions on huber loss * Fix LogPoissonLoss reduction modes & full mode switching * Update mean pairwise square error tests in libnd4j to match match those in java land Expected values are calculated using the nested loop method, as implemented in LossOpValidation.java * Use Gradient Check Mask in Loss OP Validation Tests for MEAN_BY_NONZERO_WEIGHT_COUNT MEAN_BY_NONZERO_WEIGHT_COUNT is non differentiable for weight=0 so those points have to be masked out. * Add reduction mode support to MPWSE Loss * fix gradient check numerical issues for softmax losses * Fix calculation of weights gradient in cases where label smoothing is applied * All LossOpValidation Tests are passing
deeplearning4j · Jan 30, 2019 · 65f2313 · 65f2313
1 parent 1e0c048
commit 65f2313
Show file tree

Hide file tree

Showing 17 changed files with 467 additions and 332 deletions.
diff --git a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp
@@ -174,16 +174,19 @@ namespace nd4j {
             NDArray E = 1.f - z * (*logits);
             E.applyScalar(scalar::RELU, 0.0f, &E);
             // turn E into gradient mask
-            E.applyTransform(nd4j::transform::Sign, &E);
 
-            dLdp->assign(-z * E);
-            dLdl->assign(-2.f * (*logits) * E);
+            NDArray gradientMask(E.getShapeInfo(), block.getWorkspace());
+            E.applyTransform(nd4j::transform::Sign, &gradientMask);
+
+            dLdp->assign(-z * gradientMask);
+            dLdl->assign(-2.f * (*logits) * gradientMask);
 
             switch (reductionMode) {
 
                 case 1: {											// 1 - "none" and "weighted_sum", output is scalar and equal to sum of all elements of E array
 
                     *dLdp *= *weightsBroad;
+                    *dLdl *= *weightsBroad;
 
                     if(weights->isScalar())
                         dLdw->assign(E.reduceNumber(reduce::Sum));
@@ -205,11 +208,13 @@ namespace nd4j {
 
                     if (sum.e<double>(0) == 0.) {
                         *dLdp = 0.;
+                        *dLdl = 0.;
                         *dLdw = 0.;
                     }
                     else {
 
                         *dLdp *= *weightsBroad / sum;
+                        *dLdl *= *weightsBroad / sum;
 
                         if(weights->isScalar())
                             *dLdw = 0.;
@@ -234,6 +239,7 @@ namespace nd4j {
 
                     if (numOfNonZeroWeights == 0) {
                         *dLdp = 0.;
+                        *dLdl = 0.;
                         *dLdw = 0.;
                     }
                     else {
@@ -251,6 +257,7 @@ namespace nd4j {
 
                         NDArray temp = *weightsBroad / numOfNonZeroWeightsScalar;
                         *dLdp *= temp;
+                        *dLdl *= temp;
                     }
                     break;
                 }

diff --git a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp
@@ -201,6 +201,7 @@ DECLARE_SHAPE_FN(huber_loss) {
 				case 1: {											// 1 - "none" and "weighted_sum", output is scalar and equal to sum of all elements of E array
 
 					*dLdp *= *weightsBroad;
+					*dLdl *= *weightsBroad;
 
 					if(weights->isScalar())
 						dLdw->assign(E.reduceNumber(reduce::Sum));
@@ -222,11 +223,13 @@ DECLARE_SHAPE_FN(huber_loss) {
 
 					if (sum.e<double>(0) == 0.) {
 						*dLdp = 0.;
+						*dLdl = 0.;
 						*dLdw = 0.;
 					}
 					else {
 
 						*dLdp *= *weightsBroad / sum;
+						*dLdl *= *weightsBroad / sum;
 
 						if(weights->isScalar())
 							*dLdw = 0.;
@@ -251,6 +254,7 @@ DECLARE_SHAPE_FN(huber_loss) {
 
 					if (numOfNonZeroWeights == 0) {
 						*dLdp = 0.;
+						*dLdl = 0.;
 						*dLdw = 0.;
 					}
 					else {
@@ -268,6 +272,7 @@ DECLARE_SHAPE_FN(huber_loss) {
 
 						NDArray temp = *weightsBroad / numOfNonZeroWeightsScalar;
 						*dLdp *= temp;
+						*dLdl *= temp;
 					}
 					break;
 				}

diff --git a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp
@@ -54,10 +54,10 @@ namespace ops {
 
 
         NDArray E(labels->getShapeInfo(), block.getWorkspace());
-        if (!computeFullLoss)
-            labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
-        else
+        if (computeFullLoss)
             labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
+        else
+            labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
 
 
         // multiply E on weights
@@ -177,22 +177,24 @@ namespace ops {
         NDArray E(labels->getShapeInfo(), block.getWorkspace());
         if (computeFullLoss) {
             labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
-            dLdl->assign(-(*log_predictions));
-        } else {
-            labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
 
             NDArray rDiv(labels->getShapeInfo(), block.getWorkspace());
             labels->applyScalar(scalar::ReverseDivide, 0.5f, &rDiv);
             dLdl->assign(rDiv  + labels->transform(transform::Log) + -(*log_predictions));
+        } else {
+            labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
+
+            dLdl->assign(-(*log_predictions));
         }
 
         dLdp->assign(log_predictions->transform(transform::Exp) - (*labels));
-
+        
         switch (reductionMode) {
 
             case 1: {											// 1 - "none" and "weighted_sum", output is scalar and equal to sum of all elements of E array
 
                 *dLdp *= *weightsBroad;
+                *dLdl *= *weightsBroad;
 
                 if(weights->isScalar())
                     dLdw->assign(E.reduceNumber(reduce::Sum));
@@ -214,11 +216,13 @@ namespace ops {
 
                 if (sum.e<double>(0) == 0.) {
                     *dLdp = 0.;
+                    *dLdl = 0.;
                     *dLdw = 0.;
                 }
                 else {
 
                     *dLdp *= *weightsBroad / sum;
+                    *dLdl *= *weightsBroad / sum;
 
                     if(weights->isScalar())
                         *dLdw = 0.;
@@ -243,6 +247,7 @@ namespace ops {
 
                 if (numOfNonZeroWeights == 0) {
                     *dLdp = 0.;
+                    *dLdl = 0.;
                     *dLdw = 0.;
                 }
                 else {
@@ -260,6 +265,7 @@ namespace ops {
 
                     NDArray temp = *weightsBroad / numOfNonZeroWeightsScalar;
                     *dLdp *= temp;
+                    *dLdl *= temp;
                 }
                 break;
             }