Skip to content

Commit

Permalink
[OPS] Minor edits.
Browse files Browse the repository at this point in the history
  • Loading branch information
eaplatanios committed Nov 30, 2018
1 parent 9547af5 commit 73d56a0
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 33 deletions.
2 changes: 1 addition & 1 deletion RELEASE.md
@@ -1,4 +1,4 @@
# 0.4.1 Fixed Precompiled TF Binaries and Some New Features
# 0.4.1 Fixed Precompiled TF Binaries and Added Some New Features

Fixed the precompiled TensorFlow binaries, and also added the following
new features:
Expand Down
Expand Up @@ -622,21 +622,7 @@ object Gradients {
// TODO: [TYPES] !!! Super hacky. Remove in the future.
implicit val ev: IsNumeric[T] = null

if (gradients.forall(_.isInstanceOf[Output[T]])) {
// This function adds op outputs from potentially different devices.
// We add the tensors of each device separately first, and we then add up the partial results.
val deviceContributions = gradients.groupBy(_.device).toSeq.sortBy(_._1).map {
case (_, outputs) =>
Op.colocateWithForGradient(
Set(gradients.head.op),
gradientUID,
ignoreExisting = true
) {
Math.addN(outputs.map(_.asInstanceOf[Output[T]]))
}
}
Math.addN(deviceContributions)
} else if (gradients.forall(_.isInstanceOf[OutputIndexedSlices[T]])) {
if (gradients.forall(_.isInstanceOf[OutputIndexedSlices[T]])) {
def addNOutputIndexedSlices(
gradients: Seq[OutputIndexedSlices[T]]
): OutputIndexedSlices[T] = {
Expand All @@ -659,9 +645,19 @@ object Gradients {
}
addNOutputIndexedSlices(deviceContributions)
} else {
throw new IllegalArgumentException(
"The gradients being aggregated need to be all " +
"of type 'Output' or 'OutputIndexedSlices'.")
// This function adds op outputs from potentially different devices.
// We add the tensors of each device separately first, and we then add up the partial results.
val deviceContributions = gradients.groupBy(_.device).toSeq.sortBy(_._1).map {
case (_, outputs) =>
Op.colocateWithForGradient(
Set(gradients.head.op),
gradientUID,
ignoreExisting = true
) {
Math.addN(outputs.map(_.toOutput.asInstanceOf[Output[T]]))
}
}
Math.addN(deviceContributions)
}
}
}
Expand Down
Expand Up @@ -59,7 +59,6 @@ import org.platanios.tensorflow.api.ops.variables.Variable
* @param decay Learning rate decay method to use for each update.
* @param beta1 Exponential decay rate for the first moment estimates.
* @param beta2 Exponential decay rate for the second moment estimates.
* @param useNesterov If `true`, Nesterov momentum is used for the updates.
* @param epsilon Small constant used for numerical stability. This epsilon corresponds to
* "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1),
* and not to the epsilon in Algorithm 1 of the paper.
Expand All @@ -77,7 +76,6 @@ class AMSGrad protected (
val decay: Schedule[Float] = FixedSchedule[Float](),
val beta1: Float = 0.9f,
val beta2: Float = 0.999f,
val useNesterov: Boolean = false,
val epsilon: Float = 1e-8f,
val useLocking: Boolean = false,
val learningRateSummaryTag: String = null,
Expand Down Expand Up @@ -251,14 +249,11 @@ object AMSGrad {
decay: Schedule[Float] = FixedSchedule[Float](),
beta1: Float = 0.9f,
beta2: Float = 0.999f,
useNesterov: Boolean = false,
epsilon: Float = 1e-8f,
useLocking: Boolean = false,
learningRateSummaryTag: String = null,
name: String = "AMSGrad"
): AMSGrad = {
new AMSGrad(
learningRate, decay, beta1, beta2, useNesterov,
epsilon, useLocking, learningRateSummaryTag, name)
new AMSGrad(learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name)
}
}
Expand Up @@ -66,7 +66,6 @@ import org.platanios.tensorflow.api.ops.variables.Variable
* @param decay Learning rate decay method to use for each update.
* @param beta1 Exponential decay rate for the first moment estimates.
* @param beta2 Exponential decay rate for the second moment estimates.
* @param useNesterov If `true`, Nesterov momentum is used for the updates.
* @param epsilon Small constant used for numerical stability. This epsilon corresponds to
* "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1),
* and not to the epsilon in Algorithm 1 of the paper.
Expand All @@ -84,14 +83,12 @@ class LazyAMSGrad protected (
override val decay: Schedule[Float] = FixedSchedule[Float](),
override val beta1: Float = 0.9f,
override val beta2: Float = 0.999f,
override val useNesterov: Boolean = false,
override val epsilon: Float = 1e-8f,
override val useLocking: Boolean = false,
override val learningRateSummaryTag: String = null,
override val name: String = "LazyAMSGrad"
) extends AMSGrad(
learningRate, decay, beta1, beta2, useNesterov,
epsilon, useLocking, learningRateSummaryTag, name
learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name
) {
override val ignoreDuplicateSparseIndices: Boolean = true

Expand Down Expand Up @@ -138,14 +135,11 @@ object LazyAMSGrad {
decay: Schedule[Float] = FixedSchedule[Float](),
beta1: Float = 0.9f,
beta2: Float = 0.999f,
useNesterov: Boolean = false,
epsilon: Float = 1e-8f,
useLocking: Boolean = false,
learningRateSummaryTag: String = null,
name: String = "LazyAMSGrad"
): LazyAMSGrad = {
new LazyAMSGrad(
learningRate, decay, beta1, beta2, useNesterov,
epsilon, useLocking, learningRateSummaryTag, name)
new LazyAMSGrad(learningRate, decay, beta1, beta2, epsilon, useLocking, learningRateSummaryTag, name)
}
}

0 comments on commit 73d56a0

Please sign in to comment.