[OPS] Minor edits.

eaplatanios · Nov 30, 2018 · 73d56a0 · 73d56a0
1 parent 9547af5
commit 73d56a0
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 33 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -1,4 +1,4 @@
-# 0.4.1 Fixed Precompiled TF Binaries and Some New Features
+# 0.4.1 Fixed Precompiled TF Binaries and Added Some New Features
 
 Fixed the precompiled TensorFlow binaries, and also added the following
 new features:

diff --git a/modules/api/src/main/scala/org/platanios/tensorflow/api/ops/Gradients.scala b/modules/api/src/main/scala/org/platanios/tensorflow/api/ops/Gradients.scala
@@ -622,21 +622,7 @@ object Gradients {
       // TODO: [TYPES] !!! Super hacky. Remove in the future.
       implicit val ev: IsNumeric[T] = null
 
-      if (gradients.forall(_.isInstanceOf[Output[T]])) {
-        // This function adds op outputs from potentially different devices.
-        // We add the tensors of each device separately first, and we then add up the partial results.
-        val deviceContributions = gradients.groupBy(_.device).toSeq.sortBy(_._1).map {
-          case (_, outputs) =>
-            Op.colocateWithForGradient(
-              Set(gradients.head.op),
-              gradientUID,
-              ignoreExisting = true
-            ) {
-              Math.addN(outputs.map(_.asInstanceOf[Output[T]]))
-            }
-        }
-        Math.addN(deviceContributions)
-      } else if (gradients.forall(_.isInstanceOf[OutputIndexedSlices[T]])) {
+      if (gradients.forall(_.isInstanceOf[OutputIndexedSlices[T]])) {
         def addNOutputIndexedSlices(
             gradients: Seq[OutputIndexedSlices[T]]
         ): OutputIndexedSlices[T] = {
@@ -659,9 +645,19 @@ object Gradients {
         }
         addNOutputIndexedSlices(deviceContributions)
       } else {
-        throw new IllegalArgumentException(
-          "The gradients being aggregated need to be all " +
-              "of type 'Output' or 'OutputIndexedSlices'.")
+        // This function adds op outputs from potentially different devices.
+        // We add the tensors of each device separately first, and we then add up the partial results.
+        val deviceContributions = gradients.groupBy(_.device).toSeq.sortBy(_._1).map {
+          case (_, outputs) =>
+            Op.colocateWithForGradient(
+              Set(gradients.head.op),
+              gradientUID,
+              ignoreExisting = true
+            ) {
+              Math.addN(outputs.map(_.toOutput.asInstanceOf[Output[T]]))
+            }
+        }
+        Math.addN(deviceContributions)
       }
     }
   }

diff --git a/...les/api/src/main/scala/org/platanios/tensorflow/api/ops/training/optimizers/AMSGrad.scala b/...les/api/src/main/scala/org/platanios/tensorflow/api/ops/training/optimizers/AMSGrad.scala
@@ -59,7 +59,6 @@ import org.platanios.tensorflow.api.ops.variables.Variable
   * @param  decay                  Learning rate decay method to use for each update.
   * @param  beta1                  Exponential decay rate for the first moment estimates.
   * @param  beta2                  Exponential decay rate for the second moment estimates.
-  * @param  useNesterov            If `true`, Nesterov momentum is used for the updates.
   * @param  epsilon                Small constant used for numerical stability. This epsilon corresponds to
   *                                "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1),
   *                                and not to the epsilon in Algorithm 1 of the paper.
@@ -77,7 +76,6 @@ class AMSGrad protected (
     val decay: Schedule[Float] = FixedSchedule[Float](),
     val beta1: Float = 0.9f,
     val beta2: Float = 0.999f,
-    val useNesterov: Boolean = false,
     val epsilon: Float = 1e-8f,
     val useLocking: Boolean = false,
     val learningRateSummaryTag: String = null,
@@ -251,14 +249,11 @@ object AMSGrad {
       decay: Schedule[Float] = FixedSchedule[Float](),
       beta1: Float = 0.9f,
       beta2: Float = 0.999f,
-      useNesterov: Boolean = false,
       epsilon: Float = 1e-8f,
       useLocking: Boolean = false,
       learningRateSummaryTag: String = null,
       name: String = "AMSGrad"
   ): AMSGrad = {
-    new AMSGrad(
-      learningRate, decay, beta1, beta2, useNesterov,
-      epsilon, useLocking, learningRateSummaryTag, name)
+    new AMSGrad(learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name)
   }
 }
diff --git a/...api/src/main/scala/org/platanios/tensorflow/api/ops/training/optimizers/LazyAMSGrad.scala b/...api/src/main/scala/org/platanios/tensorflow/api/ops/training/optimizers/LazyAMSGrad.scala
@@ -66,7 +66,6 @@ import org.platanios.tensorflow.api.ops.variables.Variable
   * @param  decay                  Learning rate decay method to use for each update.
   * @param  beta1                  Exponential decay rate for the first moment estimates.
   * @param  beta2                  Exponential decay rate for the second moment estimates.
-  * @param  useNesterov            If `true`, Nesterov momentum is used for the updates.
   * @param  epsilon                Small constant used for numerical stability. This epsilon corresponds to
   *                                "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1),
   *                                and not to the epsilon in Algorithm 1 of the paper.
@@ -84,14 +83,12 @@ class LazyAMSGrad protected (
     override val decay: Schedule[Float] = FixedSchedule[Float](),
     override val beta1: Float = 0.9f,
     override val beta2: Float = 0.999f,
-    override val useNesterov: Boolean = false,
     override val epsilon: Float = 1e-8f,
     override val useLocking: Boolean = false,
     override val learningRateSummaryTag: String = null,
     override val name: String = "LazyAMSGrad"
 ) extends AMSGrad(
-  learningRate, decay, beta1, beta2, useNesterov,
-  epsilon, useLocking, learningRateSummaryTag, name
+  learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name
 ) {
   override val ignoreDuplicateSparseIndices: Boolean = true
 
@@ -138,14 +135,11 @@ object LazyAMSGrad {
       decay: Schedule[Float] = FixedSchedule[Float](),
       beta1: Float = 0.9f,
       beta2: Float = 0.999f,
-      useNesterov: Boolean = false,
       epsilon: Float = 1e-8f,
       useLocking: Boolean = false,
       learningRateSummaryTag: String = null,
       name: String = "LazyAMSGrad"
   ): LazyAMSGrad = {
-    new LazyAMSGrad(
-      learningRate, decay, beta1, beta2, useNesterov,
-      epsilon, useLocking, learningRateSummaryTag, name)
+    new LazyAMSGrad(learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name)
   }
 }