diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Add.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Add.scala index e200ab7100b..120c6ad8827 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Add.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Add.scala @@ -86,10 +86,6 @@ class Add[T: ClassTag](val inputSize: Int } } - override def zeroGradParameters(): Unit = { - gradBias.zero() - } - override def clearState() : this.type = { super.clearState() ones.set() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BatchNormalization.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BatchNormalization.scala index f7406d81ccb..1b9fe69eb9c 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BatchNormalization.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BatchNormalization.scala @@ -142,13 +142,6 @@ class BatchNormalization[T: ClassTag]( this } - override def zeroGradParameters(): Unit = { - if (affine) { - gradWeight.zero() - gradBias.zero() - } - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (affine) { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BiRecurrent.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BiRecurrent.scala index 91a811ed6d3..45c298aa8c6 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BiRecurrent.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BiRecurrent.scala @@ -94,26 +94,6 @@ class BiRecurrent[T : ClassTag] ( */ override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = birnn.parameters() - override def updateParameters(learningRate: T): Unit = birnn.updateParameters(learningRate) - - /** - * If the module has parameters, this will zero the accumulation of the gradients with respect - * to these parameters. Otherwise, it does nothing. - */ - override def zeroGradParameters(): Unit = birnn.zeroGradParameters() - - override def training(): BiRecurrent.this.type = { - super.training() - birnn.training() - this - } - - override def evaluate(): BiRecurrent.this.type = { - super.evaluate() - birnn.evaluate() - this - } - override def canEqual(other: Any): Boolean = other.isInstanceOf[BiRecurrent[T]] diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Bilinear.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Bilinear.scala index a98bbf8c4c1..70f56775546 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Bilinear.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Bilinear.scala @@ -194,11 +194,6 @@ class Bilinear[T: ClassTag]( } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - gradBias.zero() - } - override def clearState(): this.type = { super.clearState() buff1.set() @@ -214,15 +209,6 @@ class Bilinear[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (null == bias) { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } else { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - } - override def toString(): String = { s"${getPrintName}($inputSize1, $inputSize2, $outputSize, $biasRes)" } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BinaryTreeLSTM.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BinaryTreeLSTM.scala index 3453d67a2a5..e7a0d69cafd 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BinaryTreeLSTM.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BinaryTreeLSTM.scala @@ -368,11 +368,6 @@ class BinaryTreeLSTM[T: ClassTag]( (cp ++ lp, cg ++ lg) } - override def updateParameters(learningRate: T): Unit = { - composer.updateParameters(learningRate) - leafModule.updateParameters(learningRate) - } - override def getParametersTable(): Table = { val pt = T() val t1 = composer.getParametersTable() @@ -382,11 +377,6 @@ class BinaryTreeLSTM[T: ClassTag]( pt } - override def zeroGradParameters(): Unit = { - composer.zeroGradParameters() - leafModule.zeroGradParameters() - } - override def reset(): Unit = { composer.reset() leafModule.reset() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CAdd.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CAdd.scala index 777d23c9705..920d6416bd9 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CAdd.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CAdd.scala @@ -141,14 +141,6 @@ class CAdd[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradBias.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.bias), Array(this.gradBias)) } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CMul.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CMul.scala index 7bd1aa3a491..66f698aa632 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CMul.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/CMul.scala @@ -163,22 +163,10 @@ class CMul[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def clearState(): this.type = { super.clearState() _repeat.set() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cell.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cell.scala index b847d0bcd1e..8151990a984 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cell.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cell.scala @@ -190,11 +190,6 @@ abstract class Cell[T : ClassTag]( gradInput } - override def updateParameters(learningRate: T): Unit = { - cell.updateParameters(learningRate) - if (includePreTopology) preTopology.updateParameters(learningRate) - } - private def initAddTimes(): Unit = { val cellTimes = cell.getTimes if (subModules == null || subModules.length < cellTimes.length) { @@ -264,11 +259,6 @@ abstract class Cell[T : ClassTag]( cell.resetTimes } - override def zeroGradParameters(): Unit = { - cell.zeroGradParameters() - if (includePreTopology) preTopology.zeroGradParameters() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { val _cell = if (includePreTopology) { Sequential().add(preTopology).add(cell) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala index 819bfee85bb..d4f85133c9e 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala @@ -257,18 +257,6 @@ class Concat[T: ClassTag](val dimension: Int)( this.gradInput } - // Todo: this is different from torch accUpdateGradParameters - override def updateParameters(learningRate: T): Unit = { - var offset = 1 - var i = 0 - while (i < this.modules.length) { - val currentOutput = this.modules(i).output.asInstanceOf[Tensor[T]] - this.modules(i).updateParameters(learningRate) - i += 1 - offset += currentOutput.size(dimension) - } - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { return false diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Container.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Container.scala index 9b485c77360..bbb6f267626 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Container.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Container.scala @@ -51,31 +51,23 @@ abstract class Container[A <: Activity : ClassTag, modules.filter(!_.isCompatibleWithTorch()).length <= 0 } - override def zeroGradParameters(): Unit = { - modules.foreach(_.zeroGradParameters()) - } - - override def updateParameters(learningRate: T): Unit = { - modules.foreach(_.updateParameters(learningRate)) - } - override def reset(): Unit = { modules.foreach(_.reset()) } - override def training(): this.type = { + final override def training(): this.type = { train = true modules.foreach(_.training()) this } - override def evaluate(): this.type = { + final override def evaluate(): this.type = { train = false modules.foreach(_.evaluate()) this } - override def checkEngineType(): this.type = { + final override def checkEngineType(): this.type = { modules.foreach(_.checkEngineType()) this } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cosine.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cosine.scala index 527589831eb..df89e66fb74 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cosine.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Cosine.scala @@ -176,18 +176,10 @@ class Cosine[T: ClassTag](val inputSize : Int, val outputSize : Int)( } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def toString(): String = { s"${getPrintName}($inputSize, $outputSize)" } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Euclidean.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Euclidean.scala index bd752c05628..9362eff5a14 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Euclidean.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Euclidean.scala @@ -149,10 +149,6 @@ class Euclidean[T: ClassTag](val inputSize: Int, val outputSize: Int, s"${getPrintName}($inputSize, $outputSize)" } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def clearState() : this.type = { super.clearState() inputBuffer.set() @@ -168,10 +164,6 @@ class Euclidean[T: ClassTag](val inputSize: Int, val outputSize: Int, (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def canEqual(other: Any): Boolean = other.isInstanceOf[Euclidean[T]] override def equals(other: Any): Boolean = other match { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Linear.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Linear.scala index a6834c3adf1..9fb0ca04617 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Linear.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Linear.scala @@ -170,20 +170,6 @@ class Linear[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.add(ev.negative(learningRate), gradWeight) - if (withBias) bias.add(ev.negative(learningRate), gradBias) - } - - override def zeroGradParameters(): Unit = { - gradWeight.resize(outputSize, inputSize) - gradWeight.zero() - if (withBias) { - gradBias.resize(outputSize) - gradBias.zero() - } - } - override def clearState() : this.type = { super.clearState() addBuffer.set() @@ -198,15 +184,6 @@ class Linear[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (null == bias) { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } else { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected1D.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected1D.scala index 80255ad26ba..98d19c35896 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected1D.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected1D.scala @@ -380,36 +380,11 @@ class LocallyConnected1D[T: ClassTag](val nInputFrame: Int, } } - override def updateParameters(learningRate: T): Unit - - = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit - - = { - gradWeight.zero() - gradBias.zero() - } - - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) - - = { + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def getParametersTable(): Table - - = { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - - override def equals(obj: Any): Boolean - - = { + override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { return false } @@ -432,9 +407,7 @@ class LocallyConnected1D[T: ClassTag](val nInputFrame: Int, gradBias == other.gradBias } - override def hashCode(): Int - - = { + override def hashCode(): Int = { val seed = 37 var hash = super.hashCode() hash = hash * seed + inputFrameSize.hashCode() @@ -449,16 +422,12 @@ class LocallyConnected1D[T: ClassTag](val nInputFrame: Int, hash } - override def clearState(): this.type - - = { + override def clearState(): this.type = { super.clearState() this } - override def toString(): String - - = { + override def toString(): String = { s"nn.TemporalConvolution($inputFrameSize -> $outputFrameSize, $kernelW x $strideW)" } } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected2D.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected2D.scala index 00aa122498f..ee784da0aac 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected2D.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LocallyConnected2D.scala @@ -452,20 +452,6 @@ class LocallyConnected2D[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - if (withBias) { - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - if (withBias) { - gradBias.zero() - } - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (withBias) { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) @@ -474,16 +460,6 @@ class LocallyConnected2D[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (withBias) { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } else { - T(getName() -> T("weight" -> weight, - "gradWeight" -> gradWeight)) - } - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTable.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTable.scala index 8566c636e67..ef2d59f17c0 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTable.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTable.scala @@ -257,18 +257,10 @@ class LookupTable[T: ClassTag] } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def clearState() : this.type = { super.clearState() inputBuffer.set() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTableSparse.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTableSparse.scala index c8f9c2d135e..49c38b11f89 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTableSparse.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/LookupTableSparse.scala @@ -217,18 +217,10 @@ class LookupTableSparse[T: ClassTag]( } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def clearState() : this.type = { super.clearState() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MapTable.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MapTable.scala index dc8f61f2b98..c00f42e22f3 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MapTable.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MapTable.scala @@ -104,19 +104,6 @@ class MapTable[T: ClassTag]( throw new IllegalArgumentException("Can not transform Container MapTable to graph") } - override def zeroGradParameters(): Unit = { - if (module != null) { - module.zeroGradParameters() - } - } - - - override def updateParameters(learningRate: T): Unit = { - if (module != null) { - module.updateParameters(learningRate) - } - } - override def toString(): String = { val tab = " " val extlast = " " diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Maxout.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Maxout.scala index 5d723b2e144..3169fb41e53 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Maxout.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Maxout.scala @@ -72,10 +72,6 @@ class Maxout[T: ClassTag](val inputSize: Int, val outputSize: Int, val maxoutNum layer.accGradParameters(input, gradOutput) } - override def zeroGradParameters(): Unit = { - layer.zeroGradParameters() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { layer.parameters() } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Mul.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Mul.scala index 1545fdcf7e2..93c91a6fbbf 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Mul.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Mul.scala @@ -63,18 +63,10 @@ class Mul[T: ClassTag](implicit ev: TensorNumeric[T]) } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def canEqual(other: Any): Boolean = other.isInstanceOf[Mul[T]] override def equals(other: Any): Boolean = other match { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MultiRNNCell.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MultiRNNCell.scala index 138ffd72574..2e2490b9157 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MultiRNNCell.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/MultiRNNCell.scala @@ -165,10 +165,6 @@ class MultiRNNCell[T : ClassTag](val cells: Array[Cell[T]])(implicit ev: TensorN gradInput } - override def zeroGradParameters(): Unit = { - cells.foreach(_.zeroGradParameters()) - } - override def reset(): Unit = { cells.foreach(_.reset()) } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/NormalizeScale.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/NormalizeScale.scala index 34892300df5..8c8970b6e84 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/NormalizeScale.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/NormalizeScale.scala @@ -65,10 +65,6 @@ class NormalizeScale[T: ClassTag](val p: Double, val eps: Double = 1e-10, override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(cmul.weight), Array(cmul.gradWeight)) } - - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> cmul.weight, "gradWeight" -> cmul.gradWeight)) - } } object NormalizeScale { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/PReLU.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/PReLU.scala index 63a246e73ce..e9be2848faf 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/PReLU.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/PReLU.scala @@ -283,18 +283,10 @@ class PReLU[T: ClassTag]( } } - override def zeroGradParameters(): Unit = { - gradWeight.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight), Array(this.gradWeight)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } - override def toString(): String = { s"${getPrintName}($nOutputPlane)" } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Scale.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Scale.scala index d5100e36668..b842582d3fe 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Scale.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Scale.scala @@ -73,11 +73,6 @@ class Scale[T: ClassTag](val size: Array[Int]) Array(cmul.parameters()._2(0), cadd.parameters()._2(0))) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> cmul.weight, "bias" -> cadd.bias, - "gradWeight" -> cmul.gradWeight, "gradBias" -> cadd.gradBias)) - } - override def toString: String = "nn.Scale" } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolution.scala index b46f0cfdbee..72a01f800f7 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolution.scala @@ -519,20 +519,6 @@ class SpatialConvolution[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - if (withBias) { - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - if (withBias) { - gradBias.zero() - } - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (withBias) { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) @@ -541,16 +527,6 @@ class SpatialConvolution[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (withBias) { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } else { - T(getName() -> T("weight" -> weight, - "gradWeight" -> gradWeight)) - } - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionMap.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionMap.scala index 9ca1f511af6..cc5cb56ee6e 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionMap.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionMap.scala @@ -293,20 +293,10 @@ class SpatialConvolutionMap[T: ClassTag]( (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - def decayParameters(decay: T): Unit = { weight.apply1(ev.minus(_, decay)) bias.apply1(ev.minus(_, decay)) } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - gradBias.zero() - } } object SpatialConvolutionMap { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialDilatedConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialDilatedConvolution.scala index 0b1fd3e5967..3ed7280014e 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialDilatedConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialDilatedConvolution.scala @@ -477,25 +477,10 @@ class SpatialDilatedConvolution[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - gradBias.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolution.scala index 56841a29710..d06fcce9e6b 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolution.scala @@ -670,18 +670,6 @@ class SpatialFullConvolution[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - if(!noBias) { - gradBias.zero() - } - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (null == bias) { (Array(this.weight), Array(this.gradWeight)) @@ -690,15 +678,6 @@ class SpatialFullConvolution[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (null == bias) { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } else { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - } - override def clearState() : this.type = { super.clearState() columns.set() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialSeperableConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialSeperableConvolution.scala index eac34049a3d..6731e0b4be7 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialSeperableConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/SpatialSeperableConvolution.scala @@ -15,11 +15,14 @@ */ package com.intel.analytics.bigdl.nn -import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, DataFormat} +import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, DataFormat} import com.intel.analytics.bigdl.optim.Regularizer +import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Shape +import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter +import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleSerializable, ModuleSerializer, SerializeContext} import scala.reflect.ClassTag @@ -211,15 +214,10 @@ class SpatialSeperableConvolution[T: ClassTag]( if (initBias == null) bias.zero() zeroGradParameters() } - - override def zeroGradParameters(): Unit = { - depthWeight.zero() - pointWeight.zero() - bias.zero() - } } -object SpatialSeperableConvolution { +object SpatialSeperableConvolution extends ModuleSerializable { + def apply[T: ClassTag](nInputChannel: Int, nOutputChannel: Int, depthMultiplier: Int, kW: Int, kH: Int, sW: Int = 1, sH: Int = 1, pW: Int = 0, pH: Int = 0, hasBias: Boolean = true, dataFormat: DataFormat = DataFormat.NCHW, @@ -270,4 +268,54 @@ object SpatialSeperableConvolution { in += 1 } } + + override def doLoadModule[T: ClassTag](context: DeserializeContext) + (implicit ev: TensorNumeric[T]) : AbstractModule[Activity, Activity, T] = { + val attrMap = context.bigdlModule.getAttrMap + val ssc = super.doLoadModule(context).asInstanceOf[SpatialSeperableConvolution[T]] + val weights = ssc.parameters()._1 + val (depthWeight, pointWeight, bias) = (weights(0), weights(1), weights(2)) + + val depthWeightLoad = DataConverter. + getAttributeValue(context, attrMap.get("depthWeight")). + asInstanceOf[Tensor[T]] + depthWeight.copy(depthWeightLoad) + + val pointWeightLoad = DataConverter. + getAttributeValue(context, attrMap.get("pointWeight")). + asInstanceOf[Tensor[T]] + pointWeight.copy(pointWeightLoad) + + val biasLoad = DataConverter. + getAttributeValue(context, attrMap.get("bias")). + asInstanceOf[Tensor[T]] + bias.copy(biasLoad) + + ssc.asInstanceOf[AbstractModule[Activity, Activity, T]] + } + override def doSerializeModule[T: ClassTag](context: SerializeContext[T], + sreluBuilder : BigDLModule.Builder) + (implicit ev: TensorNumeric[T]) : Unit = { + + super.doSerializeModule(context, sreluBuilder) + + val ssc = context.moduleData.module.asInstanceOf[SpatialSeperableConvolution[T]] + val weights = ssc.parameters()._1 + val (depthWeight, pointWeight, bias) = (weights(0), weights(1), weights(2)) + + val depthWeightBuilder = AttrValue.newBuilder + DataConverter.setAttributeValue(context, depthWeightBuilder, + depthWeight, ModuleSerializer.tensorType) + sreluBuilder.putAttr("depthWeight", depthWeightBuilder.build) + + val pointWeightBuilder = AttrValue.newBuilder + DataConverter.setAttributeValue(context, pointWeightBuilder, + pointWeight, ModuleSerializer.tensorType) + sreluBuilder.putAttr("pointWeight", pointWeightBuilder.build) + + val biasBuilder = AttrValue.newBuilder + DataConverter.setAttributeValue(context, biasBuilder, + bias, ModuleSerializer.tensorType) + sreluBuilder.putAttr("bias", biasBuilder.build) + } } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TemporalConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TemporalConvolution.scala index 31fc71260f7..ff197789f7f 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TemporalConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TemporalConvolution.scala @@ -391,25 +391,10 @@ class TemporalConvolution[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - gradBias.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { return false diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TimeDistributed.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TimeDistributed.scala index aad277c2118..272978113d7 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TimeDistributed.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/TimeDistributed.scala @@ -151,16 +151,6 @@ class TimeDistributed[T : ClassTag] (val layer: AbstractModule[Tensor[T], Tensor gradInput } - /** - * If the module has parameters, this will zero the accumulation of the gradients with respect - * to these parameters. Otherwise, it does nothing. - */ - override def zeroGradParameters(): Unit = { - layer.zeroGradParameters() - } - - override def updateParameters(learningRate: T): Unit = layer.updateParameters(learningRate) - override def reset(): Unit = layer.reset() override def training(): TimeDistributed.this.type = { @@ -210,14 +200,6 @@ class TimeDistributed[T : ClassTag] (val layer: AbstractModule[Tensor[T], Tensor */ override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = layer.parameters() - /** - * This method compact all parameters and gradients of the model into two tensors. So it's easier - * to use optim method - * - * @return - */ - override def getParameters(): (Tensor[T], Tensor[T]) = layer.getParameters() - /** * This method will return a table indicating the name and corresponding parameters. * @return Table diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricConvolution.scala index 3c973b62119..94fc5f37dd6 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricConvolution.scala @@ -97,18 +97,6 @@ class VolumetricConvolution[T: ClassTag]( this } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - if (withBias) { - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - if (withBias) gradBias.zero() - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (withBias) { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) @@ -117,16 +105,6 @@ class VolumetricConvolution[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (withBias) { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } else { - T(getName() -> T("weight" -> weight, - "gradWeight" -> gradWeight)) - } - } - override def computeOutputShape(inputShape: Shape): Shape = { val input = inputShape.toSingle().toArray require(input.length == 5, diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricFullConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricFullConvolution.scala index 00e5867abf1..2bfe4ef1b57 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricFullConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/VolumetricFullConvolution.scala @@ -724,18 +724,6 @@ class VolumetricFullConvolution[T: ClassTag]( } } - override def updateParameters(learningRate: T): Unit = { - weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) - bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) - } - - override def zeroGradParameters(): Unit = { - gradWeight.zero() - if(!noBias) { - gradBias.zero() - } - } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { if (null == bias) { (Array(this.weight), Array(this.gradWeight)) @@ -744,15 +732,6 @@ class VolumetricFullConvolution[T: ClassTag]( } } - override def getParametersTable(): Table = { - if (null == bias) { - T(getName() -> T("weight" -> weight, "gradWeight" -> gradWeight)) - } else { - T(getName() -> T("weight" -> weight, "bias" -> bias, - "gradWeight" -> gradWeight, "gradBias" -> gradBias)) - } - } - override def clearState() : this.type = { super.clearState() columns.set() diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala index 97e33c8f122..80a869db92b 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala @@ -42,7 +42,7 @@ import scala.reflect.ClassTag * [[TensorModule]] is an abstract sub-class of [[AbstractModule]], whose * input and output type both are [[Tensor]]. * - * @tparam T The numeric type in this module, usually which are [[Float]] or [[Double]] + * @tparam T The numeric type in this module parameters */ abstract class TensorModule[T: ClassTag] (implicit ev: TensorNumeric[T]) extends AbstractModule[Tensor[T], Tensor[T], T] @@ -53,16 +53,13 @@ abstract class TensorModule[T: ClassTag] * * @tparam A Input data type * @tparam B Output data type - * @tparam T Numeric type of parameter(e.g. weight, bias). Only support float/double now + * @tparam T The numeric type in this module parameters. */ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, T: ClassTag]( implicit ev: TensorNumeric[T]) extends Serializable with InferShape{ - private var namePostfix = Integer.toHexString(java.util.UUID.randomUUID().hashCode()) - - def getNamePostfix : String = namePostfix + // ================================= Public APIs ============================================= - def setNamePostfix(namePostfix : String) : Unit = this.namePostfix = namePostfix /** * The cached output. So we don't compute it again when need it @@ -74,24 +71,17 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, */ var gradInput: A = Activity.allocate[A, T]() - /** - * The scale of gradient weight and gradient bias - * before gradParameters being accumulated. - */ - protected var scaleW: Double = 1.0 - protected var scaleB: Double = 1.0 - /** * Get the scale of gradientWeight */ - def getScaleW(): Double = { + final def getScaleW(): Double = { scaleW } /** * Get the scale of gradientBias */ - def getScaleB(): Double = { + final def getScaleB(): Double = { scaleB } @@ -138,18 +128,11 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, this } - private[nn] def allocateAs(dest: Activity): Activity = dest match { - case tensor: Tensor[T] => Tensor[T]() - case table: Table => T() - case _ => throw new IllegalArgumentException("Activity only support tensor and table now") - } - /** - * The name of the module + * Whether user set a name to the module before + * @return */ - private var name : String = null - - def hasName: Boolean = name != null + final def hasName: Boolean = name != null /** * Set the module name @@ -157,7 +140,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param name * @return */ - def setName(name : String) : this.type = { + final def setName(name : String) : this.type = { this.name = name this } @@ -167,7 +150,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * * @return */ - def getName() : String = { + final def getName() : String = { if (this.name == null) { s"${this.getClass.getSimpleName}${namePostfix}" } else { @@ -175,34 +158,25 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, } } - protected def getPrintName(): String = { - val postfix = if (name == null) { - namePostfix - } else { - name - } - s"${this.getClass.getSimpleName}[${postfix}]" - - } - override def toString(): String = getPrintName - protected var forwardTime = 0L - - protected var backwardTime = 0L - + /** + * Get the forward/backward cost time for the module or its submodules + * @return + */ def getTimes(): Array[(AbstractModule[_ <: Activity, _ <: Activity, T], Long, Long)] = { Array((this, forwardTime, backwardTime)) } + /** + * Reset the forward/backward record time for the module or its submodules + * @return + */ def resetTimes(): Unit = { forwardTime = 0 backwardTime = 0 } - private var scaleWCache: Double = scaleW - private var scaleBCache: Double = scaleB - /** * freeze the module, * i.e. their parameters(weight/bias, if exists) are not changed in training process @@ -332,27 +306,14 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * If the module has parameters, this will zero the accumulation of the gradients with respect * to these parameters. Otherwise, it does nothing. */ - def zeroGradParameters(): Unit = { + final def zeroGradParameters(): Unit = { if (parameters() != null) { - parameters()._2.foreach(grad => { - grad.zero() - }) + parameters()._1.zip(parameters()._2)foreach{ case (weight, grad) => + grad.resizeAs(weight).zero() + } } } - def updateParameters(learningRate: T): Unit = {} - - /** - * This method compact all parameters and gradients of the model into two tensors. So it's easier - * to use optim method - * - * @return - */ - def getParameters(): (Tensor[T], Tensor[T]) = { - val (weightParameters, gradParameters) = this.parameters() - (Module.flatten[T](weightParameters), Module.flatten[T](gradParameters)) - } - /** * This function returns two arrays. One for the weights and the other the gradients * Custom modules should override this function if they have parameters @@ -379,12 +340,12 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * * @return this */ - def setExtraParameter(extraParam: Array[Tensor[T]]): this.type = { + final def setExtraParameter(extraParam: Array[Tensor[T]]): this.type = { val currentExtraParam = this.getExtraParameter() if (extraParam != null && currentExtraParam != null) { require(extraParam.length == currentExtraParam.length, "state's length doesn't match, excepted:" + - s"${currentExtraParam.length}, but got ${extraParam.length}") + s"${currentExtraParam.length}, but got ${extraParam.length}") var i = 0 while (i < extraParam.length) { currentExtraParam(i).copy(extraParam(i)) @@ -402,64 +363,106 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, /** * This function returns a table contains ModuleName, the parameter names and parameter value * in this module. + * * The result table is a structure of Table(ModuleName -> Table(ParameterName -> ParameterValue)), * and the type is Table[String, Table[String, Tensor[T]]]. * * For example, get the weight of a module named conv1: * table[Table]("conv1")[Tensor[T]]("weight"). * - * Custom modules should override this function if they have parameters. + * The names of the parameters follow such convention: + * + * 1. If there's one parameter, the parameter is named as "weight", the gradient is named as + * "gradWeight" + * + * 2. If there're two parameters, the first parameter is named as "weight", the first gradient is + * named as "gradWeight"; the second parameter is named as "bias", the seconcd gradient is + * named as "gradBias" + * + * 3. If there're more parameters, the weight is named as "weight" with a seq number as suffix, + * the gradient is named as "gradient" with a seq number as suffix + * + * Custom modules should override this function the default impl if the convention doesn't meet + * the requirement. * * @return Table */ - def getParametersTable(): Table = null + def getParametersTable(): Table = { + val params = parameters() + if (params == null) return null + val (weights, gradients) = params + require(gradients.length == weights.length, "weight number is not equal to grad number") + + if (weights.length == 1) { + T(getName() -> T("weight" -> weights(0), "gradWeight" -> gradients(0))) + } else if (weights.length == 2) { + T(getName() -> T("weight" -> weights(0), "bias" -> weights(1), + "gradWeight" -> gradients(0), "gradBias" -> gradients(1))) + } else { + val result = T() + weights.zip(gradients).zipWithIndex.map { case ((w, g), i) => + result(s"weight$i") = w + result(s"gradient$i") = g + } + T(getName() -> result) + } + } /** - * Module status. It is useful for modules like dropout/batch normalization + * Set the module to training mode + * @return */ - protected var train: Boolean = true - def training(): this.type = { train = true this } + /** + * Set the module to evaluate mode + * @return + */ def evaluate(): this.type = { train = false this } + /** + * Check if the model is in training mode + * @return + */ final def isTraining(): Boolean = { this.train } + /** + * Reset module parameters, which is re-initialize the parameter with given initMethod + */ def reset(): Unit = {} - - protected var line = "\n" - - def setLine(line: String): this.type = { + /** + * Set the line separator when print the module + * @param line + * @return + */ + final def setLine(line: String): this.type = { this.line = line this } - private val engineType: EngineType = Engine.getEngineType() - /** - * get execution engine type + * Clone the model + * @return */ - def checkEngineType(): this.type = { - if (engineType != Engine.getEngineType()) { - throw new Error("Module's EngineType doesn't march global EngineType") - } - this - } - - def cloneModule(): AbstractModule[A, B, T] = { + final def cloneModule(): AbstractModule[A, B, T] = { SerializationUtils.clone(this) } - def clone(deepCopy : Boolean): AbstractModule[A, B, T] = { + /** + * Clone the module, deep or shallow copy + * @param deepCopy + * @return + */ + final def clone(deepCopy : Boolean): AbstractModule[A, B, T] = { val moduleData = ModuleData[T](this. asInstanceOf[AbstractModule[Activity, Activity, T]], Seq[String](), Seq[String]()) val storages = new mutable.HashMap[Int, Any]() @@ -478,64 +481,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, copy } - - private def setWeightAndBias(copy : AbstractModule[A, B, T], deepCopy : Boolean): Unit = { - val parameterTable = this.getParametersTable - val copiedModuleParamTable = copy.getParametersTable - if (parameterTable != null) { - require(copiedModuleParamTable != null, "cloned module should have params") - parameterTable.foreach { - case (name: String, params: Table) => - require(copiedModuleParamTable.get(name) != None, s"cloned module should have for $name") - setLayerWeightAndBias(params, - copiedModuleParamTable.get(name).get.asInstanceOf[Table], deepCopy) - } - } - } - - private def setLayerWeightAndBias(params : Table, - copyParams : Table, deepCopy : Boolean): Unit = { - params.foreach(param => { - copyParam(params, copyParams, deepCopy, param._1.toString) - }) - } - - private def copyParam(params : Table, copyParams : Table, - deepCopy : Boolean, paraName : String) : Unit = { - if (params.contains(paraName)) { - // this is for quantization tensors where the weight might be an array - if (params.get(paraName).get - .isInstanceOf[Array[Tensor[T]]]) { - val copies = copyParams.get(paraName).get - .asInstanceOf[Array[Tensor[T]]] - val origins = params.get(paraName).get - .asInstanceOf[Array[Tensor[T]]] - var i = 0 - while (i < copies.length) { - copyTensor(origins(i), copies(i), deepCopy) - i += 1 - } - } else { - // For normal layers, their params are just tensors - copyTensor(params.get(paraName).get.asInstanceOf[Tensor[T]], - copyParams.get(paraName).get.asInstanceOf[Tensor[T]], deepCopy) - } - } - } - - private def copyTensor(t1 : Tensor[T], t2 : Tensor[T], deepCopy : Boolean) = { - if (t2.isInstanceOf[QuantizedTensor[_]]) { - t2.asInstanceOf[QuantizedTensor[_]].release() - } - if (deepCopy) { - t2.copy(t1) - } else { - t2.set(t1) - } - } - - def canEqual(other: Any): Boolean = other.isInstanceOf[AbstractModule[A, B, T]] - override def equals(other: Any): Boolean = other match { case that: AbstractModule[A, B, T] => (that canEqual this) && @@ -560,8 +505,8 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param overWrite if overwrite * @return self */ - @deprecated("please use recommended saveModule(path, overWrite)") - def save(path : String, overWrite: Boolean = false) : this.type = { + @deprecated("please use recommended saveModule(path, overWrite)", "0.3.0") + final def save(path : String, overWrite: Boolean = false) : this.type = { this.clearState() File.save(this, path, overWrite) this @@ -576,8 +521,8 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param overWrite if overwrite * @return self */ - def saveModule(path : String, weightPath : String = null, - overWrite: Boolean = false) : this.type = { + final def saveModule(path : String, weightPath : String = null, + overWrite: Boolean = false) : this.type = { this.clearState() ModulePersister.saveToFile(path, weightPath, this, overWrite) this @@ -591,30 +536,52 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param overWrite if overwrite * @return self */ - def saveDefinition(path : String, overWrite: Boolean = false) : this.type = { + final def saveDefinition(path : String, overWrite: Boolean = false) : this.type = { this.clearState() ModulePersister.saveModelDefinitionToFile(path, this, overWrite) this } - def saveTorch(path : String, overWrite: Boolean = false) : this.type = { + /** + * Save this module to path in torch7 readable format + * @param path + * @param overWrite + * @return + */ + final def saveTorch(path : String, overWrite: Boolean = false) : this.type = { this.clearState() File.saveTorch(this, path, TYPE_MODULE, overWrite) this } - def saveCaffe(prototxtPath: String, modelPath: String, + /** + * Save this module to path in caffe readable format + * @param prototxtPath + * @param modelPath + * @param useV2 + * @param overwrite + * @return + */ + final def saveCaffe(prototxtPath: String, modelPath: String, useV2 : Boolean = true, overwrite : Boolean = false) : this.type = { this.clearState() CaffePersister.persist[T](prototxtPath, modelPath, this, useV2, overwrite) this } - def saveTF( - inputs : Seq[(String, Seq[Int])], - path: String, - byteOrder: ByteOrder = ByteOrder.LITTLE_ENDIAN, - dataFormat: TensorflowDataFormat = TensorflowDataFormat.NHWC): this.type = { + /** + * Save this module to path in tensorflow readable format + * @param inputs + * @param path + * @param byteOrder + * @param dataFormat + * @return + */ + final def saveTF( + inputs : Seq[(String, Seq[Int])], + path: String, + byteOrder: ByteOrder = ByteOrder.LITTLE_ENDIAN, + dataFormat: TensorflowDataFormat = TensorflowDataFormat.NHWC): this.type = { require(this.isInstanceOf[Graph[T]], "only Graph container can be saved as Tensorflow model") this.clearState() val inTrainMode = train @@ -629,9 +596,10 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, } /** - * @return Float or Double + * Get numeric type of module parameters + * @return */ - def getNumericType(): TensorDataType = { + final def getNumericType(): TensorDataType = { ev.getType() } @@ -642,9 +610,9 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * if -1, default is 4 * partitionNumber of datatset * @param shareBuffer whether to share same memory for each batch predict results */ - def predict(dataset: RDD[Sample[T]], - batchSize: Int = -1, - shareBuffer: Boolean = false): RDD[Activity] = { + final def predict(dataset: RDD[Sample[T]], + batchSize: Int = -1, + shareBuffer: Boolean = false): RDD[Activity] = { Predictor(this).predict(dataset, batchSize, shareBuffer) } @@ -654,7 +622,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param batchSize total batchSize for all partitions. * if -1, default is 4 * partitionNumber of dataset */ - def predictClass(dataset: RDD[Sample[T]], batchSize: Int = -1): RDD[Int] = { + final def predictClass(dataset: RDD[Sample[T]], batchSize: Int = -1): RDD[Int] = { Predictor(this).predictClass(dataset, batchSize) } @@ -672,7 +640,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param featurePaddingParam featurePaddingParam if the inputs have variant size * @return */ - def predictImage(imageFrame: ImageFrame, + final def predictImage(imageFrame: ImageFrame, outputLayer: String = null, shareBuffer: Boolean = false, batchPerPartition: Int = 4, @@ -693,7 +661,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param newWeights array of weights and bias * @return */ - def setWeightsBias(newWeights: Array[Tensor[T]]): this.type = { + final def setWeightsBias(newWeights: Array[Tensor[T]]): this.type = { require(parameters() != null, "this layer does not have weight/bias") require(parameters()._1.length == newWeights.length, "the number of input weight/bias is not consistant with " + @@ -703,9 +671,9 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, val weights = parameters()._1 for(i <- newWeights.indices) { // TODO: enable this checking as we don't respect shape right now. -// require(weights(i).size().deep == newWeights(i).size().deep, -// s"Mismatch shape, ${weights(i).size().mkString(",")}" + -// s" vs ${newWeights(i).size().mkString(",")} ") + // require(weights(i).size().deep == newWeights(i).size().deep, + // s"Mismatch shape, ${weights(i).size().mkString(",")}" + + // s" vs ${newWeights(i).size().mkString(",")} ") weights(i).copy(newWeights(i)) } this @@ -716,7 +684,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @return array of weights and bias * */ - def getWeightsBias(): Array[Tensor[T]] = { + final def getWeightsBias(): Array[Tensor[T]] = { if (parameters() != null) { parameters()._1 } else { @@ -729,7 +697,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param path file to save * @param overWrite whether to overwrite or not */ - def saveWeights(path: String, overWrite: Boolean): Unit = { + final def saveWeights(path: String, overWrite: Boolean): Unit = { val parameterTable = getParametersTable() val weightsBiasTable = T() parameterTable.foreach { @@ -754,7 +722,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * if not, only load existing pretrained weights and bias * @return current module */ - def loadWeights(weightPath: String, matchAll: Boolean = true): this.type = { + final def loadWeights(weightPath: String, matchAll: Boolean = true): this.type = { val srcParameter = File.load[Table](weightPath) val targetParameter = getParametersTable() copyWeights(targetParameter, srcParameter, matchAll) @@ -767,32 +735,13 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param matchAll whether to match all layers' weights and bias, * @return current module */ - def loadModelWeights(srcModel: Module[Float], matchAll: Boolean = true): this.type = { + final def loadModelWeights(srcModel: Module[Float], matchAll: Boolean = true): this.type = { val srcParameter = srcModel.getParametersTable() val targetParameter = getParametersTable() copyWeights(targetParameter, srcParameter, matchAll) this } - private def copyWeights(target: Table, src: Table, matchAll: Boolean): Unit = { - target.foreach { - case (name: String, targetParams: Table) => - if (src.contains(name)) { - val srcParams = src[Table](name) - if (srcParams.contains("weight")) { - val w = srcParams[Tensor[T]]("weight") - targetParams[Tensor[T]]("weight").resizeAs(w).copy(w) - } - if (srcParams.contains("bias")) { - val b = srcParams[Tensor[T]]("bias") - targetParams[Tensor[T]]("bias").resizeAs(b).copy(b) - } - } else { - if (matchAll) new Exception(s"module $name cannot find corresponding weight bias") - } - } - } - /** * Build graph: some other modules point to current module * @param nodes upstream module nodes @@ -825,7 +774,8 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param nodesWithIndex upstream module nodes and the output tensor index. The start index is 1. * @return node containing current module */ - def inputs(first: (ModuleNode[T], Int), nodesWithIndex : (ModuleNode[T], Int)*): ModuleNode[T] = { + def inputs(first: (ModuleNode[T], Int), nodesWithIndex : (ModuleNode[T], Int)*) + : ModuleNode[T] = { val curNode = new ModuleNode[T](this) first._1.add(curNode, Edge(first._2)) nodesWithIndex.foreach(nodeWithIndex => { @@ -834,6 +784,17 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, curNode } + /** + * Generate graph module with start nodes + * @param startNodes + * @return + */ + def toGraph(startNodes: ModuleNode[T]*): Graph[T] = { + val starts = if (startNodes.isEmpty) Array(Input[T]()) else startNodes.toArray + val endNodes = this.getEndNodes(starts) + Graph(starts, endNodes) + } + /** * Find a module with given name. If there is no module with given name, it will return None. If * there are multiple modules with the given name, an exception will be thrown. @@ -849,30 +810,206 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, } /** - * use ValidationMethod to evaluate module + * use ValidationMethod to evaluate module on the given rdd dataset * @param dataset dataset for test * @param vMethods validation methods * @param batchSize total batchsize of all partitions, * optional param and default 4 * partitionNum of dataset * @return */ - def evaluate(dataset: RDD[Sample[T]], - vMethods: Array[ValidationMethod[T]], - batchSize: Option[Int] = None): Array[(ValidationResult, ValidationMethod[T])] = { + final def evaluate( + dataset: RDD[Sample[T]], + vMethods: Array[ValidationMethod[T]], + batchSize: Option[Int] = None + ): Array[(ValidationResult, ValidationMethod[T])] = { Evaluator(this).test(dataset, vMethods, batchSize) } - - def evaluate(dataSet: LocalDataSet[MiniBatch[T]], - vMethods: Array[ValidationMethod[T]] - ): Array[(ValidationResult, ValidationMethod[T])] = { + /** + * use ValidationMethod to evaluate module on the given local dataset + * @param dataSet + * @param vMethods + * @return + */ + final def evaluate( + dataSet: LocalDataSet[MiniBatch[T]], + vMethods: Array[ValidationMethod[T]] + ): Array[(ValidationResult, ValidationMethod[T])] = { Validator(this, dataSet).test(vMethods) } - def quantize(): Module[T] = { + /** + * Quantize this module, which reduces the precision of the parameter. Get a higher speed with a + * little accuracy cost. + * @return + */ + final def quantize(): Module[T] = { Quantization.quantize(this) } + // ================================= Internal APIs =========================================== + + private var namePostfix = Integer.toHexString(java.util.UUID.randomUUID().hashCode()) + + final private[bigdl] def getNamePostfix : String = namePostfix + + final private[bigdl] def setNamePostfix(namePostfix : String) : Unit = + this.namePostfix = namePostfix + + /** + * The scale of gradient weight and gradient bias + * before gradParameters being accumulated. + */ + protected var scaleW: Double = 1.0 + protected var scaleB: Double = 1.0 + + private[nn] final def allocateAs(dest: Activity): Activity = dest match { + case tensor: Tensor[T] => Tensor[T]() + case table: Table => T() + case _ => throw new IllegalArgumentException("Activity only support tensor and table now") + } + + /** + * The name of the module + */ + private var name : String = null + + protected final def getPrintName(): String = { + val postfix = if (name == null) { + namePostfix + } else { + name + } + s"${this.getClass.getSimpleName}[${postfix}]" + + } + + protected var forwardTime = 0L + + protected var backwardTime = 0L + + private var scaleWCache: Double = scaleW + private var scaleBCache: Double = scaleB + + /** + * This function returns two tensors. One for the flattened trainable parameters flatParameters + * and another for the gradients of the energy wrt to the trainable parameters flatGradParameters. + * + * Custom modules should not override this function. They should instead override parameters(...) + * which is, in turn, called by the present function. + * + * This function will go over all the weights and gradWeights and make them view into a single + * tensor (one for weights and one for gradWeights). + * + * @return + */ + final private[bigdl] def getParameters(): (Tensor[T], Tensor[T]) = { + val (weightParameters, gradParameters) = this.parameters() + + // If some gradParameters are not allocated storage, allocate it + require(weightParameters.size == gradParameters.size, + "weights and gradient number are not match") + weightParameters.zip(gradParameters).foreach{ case(w, g) => g.resizeAs(w)} + (Module.flatten[T](weightParameters), Module.flatten[T](gradParameters)) + } + + /** + * Module status. It is useful for modules like dropout/batch normalization + */ + protected var train: Boolean = true + + + protected var line = "\n" + + + private val engineType: EngineType = Engine.getEngineType() + + /** + * get execution engine type + */ + private[bigdl] def checkEngineType(): this.type = { + if (engineType != Engine.getEngineType()) { + throw new Error("Module's EngineType doesn't march global EngineType") + } + this + } + + final private def setWeightAndBias(copy : AbstractModule[A, B, T], deepCopy : Boolean): Unit = { + val parameterTable = this.getParametersTable + val copiedModuleParamTable = copy.getParametersTable + if (parameterTable != null) { + require(copiedModuleParamTable != null, "cloned module should have params") + parameterTable.foreach { + case (name: String, params: Table) => + require(copiedModuleParamTable.get(name) != None, s"cloned module should have for $name") + setLayerWeightAndBias(params, + copiedModuleParamTable.get(name).get.asInstanceOf[Table], deepCopy) + } + } + } + + final private def setLayerWeightAndBias(params : Table, + copyParams : Table, deepCopy : Boolean): Unit = { + params.foreach(param => { + copyParam(params, copyParams, deepCopy, param._1.toString) + }) + } + + final private def copyParam(params : Table, copyParams : Table, + deepCopy : Boolean, paraName : String) : Unit = { + if (params.contains(paraName)) { + // this is for quantization tensors where the weight might be an array + if (params.get(paraName).get + .isInstanceOf[Array[Tensor[T]]]) { + val copies = copyParams.get(paraName).get + .asInstanceOf[Array[Tensor[T]]] + val origins = params.get(paraName).get + .asInstanceOf[Array[Tensor[T]]] + var i = 0 + while (i < copies.length) { + copyTensor(origins(i), copies(i), deepCopy) + i += 1 + } + } else { + // For normal layers, their params are just tensors + copyTensor(params.get(paraName).get.asInstanceOf[Tensor[T]], + copyParams.get(paraName).get.asInstanceOf[Tensor[T]], deepCopy) + } + } + } + + final private def copyTensor(t1 : Tensor[T], t2 : Tensor[T], deepCopy : Boolean) = { + if (t2.isInstanceOf[QuantizedTensor[_]]) { + t2.asInstanceOf[QuantizedTensor[_]].release() + } + if (deepCopy) { + t2.copy(t1) + } else { + t2.set(t1) + } + } + + final private def copyWeights(target: Table, src: Table, matchAll: Boolean): Unit = { + target.foreach { + case (name: String, targetParams: Table) => + if (src.contains(name)) { + val srcParams = src[Table](name) + if (srcParams.contains("weight")) { + val w = srcParams[Tensor[T]]("weight") + targetParams[Tensor[T]]("weight").resizeAs(w).copy(w) + } + if (srcParams.contains("bias")) { + val b = srcParams[Tensor[T]]("bias") + targetParams[Tensor[T]]("bias").resizeAs(b).copy(b) + } + } else { + if (matchAll) new Exception(s"module $name cannot find corresponding weight bias") + } + } + } + + private[bigdl] def canEqual(other: Any): Boolean = other.isInstanceOf[AbstractModule[A, B, T]] + /** * Generate end nodes of current module with start nodes @@ -884,23 +1021,12 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, endNodes } - /** - * Generate graph module with start nodes - * @param startNodes - * @return - */ - def toGraph(startNodes: ModuleNode[T]*): Graph[T] = { - val starts = if (startNodes.isEmpty) Array(Input[T]()) else startNodes.toArray - val endNodes = this.getEndNodes(starts) - Graph(starts, endNodes) - } - /** * Return classTag numerics for module serialization. If your module contains multiple classtag * in the constructor, you should override this method * @return */ - def getClassTagNumerics() : (Array[ClassTag[_]], Array[TensorNumeric[_]]) = { + private[bigdl] def getClassTagNumerics() : (Array[ClassTag[_]], Array[TensorNumeric[_]]) = { (Array(scala.reflect.classTag[T]), Array(ev)) } } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/Linear.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/Linear.scala index ada8628ca5d..8063337378d 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/Linear.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/Linear.scala @@ -106,10 +106,6 @@ private[bigdl] class Linear[T: ClassTag]( (Array(weight, bias), Array(empty, empty)) } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "bias" -> bias)) - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { return false diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/SpatialConvolution.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/SpatialConvolution.scala index f5602168658..b44473d5d16 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/SpatialConvolution.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/quantized/SpatialConvolution.scala @@ -216,10 +216,6 @@ private[bigdl] class SpatialConvolution[T: ClassTag]( (weight :+ bias, Array.fill[Tensor[T]](nGroup + 1)(empty)) // nGroup's weight + bias } - override def getParametersTable(): Table = { - T(getName() -> T("weight" -> weight, "bias" -> bias)) - } - override def equals(obj: Any): Boolean = { if (!super.equals(obj)) { return false diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/python/api/PythonBigDL.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/python/api/PythonBigDL.scala index 41aecc72fc7..f3557e6f809 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/python/api/PythonBigDL.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/python/api/PythonBigDL.scala @@ -2316,7 +2316,8 @@ class PythonBigDL[T: ClassTag](implicit ev: TensorNumeric[T]) extends Serializab } def updateParameters(model: AbstractModule[Activity, Activity, T], lr: Double): Unit = { - model.updateParameters(ev.fromType(lr)) + val (w, g) = model.getParameters() + w.add(ev.negative(ev.fromType(lr)), g) } def uniform(a: Double, b: Double, size: JList[Int]): JTensor = { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/serializer/ModuleSerializer.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/serializer/ModuleSerializer.scala index 920f892cb9e..341feb0fc95 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/serializer/ModuleSerializer.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/serializer/ModuleSerializer.scala @@ -213,6 +213,8 @@ object ModuleSerializer extends ModuleSerializable{ registerModule("com.intel.analytics.bigdl.nn.ops.RandomUniform", RandomUniformOps) registerModule("com.intel.analytics.bigdl.nn.tf.StrideSlice", StrideSlice) registerModule("com.intel.analytics.bigdl.nn.MultiRNNCell", MultiRNNCell) + registerModule("com.intel.analytics.bigdl.nn.SpatialSeperableConvolution", + SpatialSeperableConvolution) } } diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/CAddSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/CAddSpec.scala index 779fe82856d..30d97f98609 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/CAddSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/CAddSpec.scala @@ -47,7 +47,8 @@ class CAddSpec extends FlatSpec with Matchers { val gradCriterion = criterion.backward (pred, y) mlp.zeroGradParameters () mlp.backward (x, gradCriterion) - mlp.updateParameters (learningRate) + val (weight, grad) = mlp.getParameters() + weight.add(-learningRate, grad) err } diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/DynamicGraphSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/DynamicGraphSpec.scala index 87e06da4638..28b5e761036 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/DynamicGraphSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/DynamicGraphSpec.scala @@ -1063,7 +1063,6 @@ class DynamicGraphSpec extends FlatSpec with Matchers { model.zeroGradParameters() println("output1: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) @@ -1073,7 +1072,6 @@ class DynamicGraphSpec extends FlatSpec with Matchers { model.freeze("fc2") println("output2: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) fc1.element.getParameters()._1.apply1(_ => 1.0f) @@ -1082,7 +1080,6 @@ class DynamicGraphSpec extends FlatSpec with Matchers { model.unFreeze() println("output3: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) fc1.element.getParameters()._1.apply1(_ => 1.0f) @@ -1091,7 +1088,6 @@ class DynamicGraphSpec extends FlatSpec with Matchers { model.zeroGradParameters() println("output4: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc1 weight \n", fc1.element.parameters()._1(0)) println("fc2 weight \n", fc2.element.parameters()._1(0)) } diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/GraphSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/GraphSpec.scala index 233005a1061..a5b771a1150 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/GraphSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/GraphSpec.scala @@ -1094,7 +1094,6 @@ class StaticGraphSpec extends FlatSpec with Matchers { model.zeroGradParameters() println("output1: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) @@ -1104,7 +1103,6 @@ class StaticGraphSpec extends FlatSpec with Matchers { model.freeze("fc2") println("output2: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) fc1.element.getParameters()._1.apply1(_ => 1.0f) @@ -1113,7 +1111,6 @@ class StaticGraphSpec extends FlatSpec with Matchers { model.unFreeze() println("output3: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc2 weight \n", fc2.element.parameters()._1(0)) fc1.element.getParameters()._1.apply1(_ => 1.0f) @@ -1122,7 +1119,6 @@ class StaticGraphSpec extends FlatSpec with Matchers { model.zeroGradParameters() println("output4: \n", model.forward(input)) model.backward(input, gradOutput) - model.updateParameters(1) println("fc1 weight \n", fc1.element.parameters()._1(0)) println("fc2 weight \n", fc2.element.parameters()._1(0)) } diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/LinearSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/LinearSpec.scala index 3b87aa552eb..90d3d8d608a 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/LinearSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/LinearSpec.scala @@ -184,7 +184,8 @@ class LinearSpec extends FlatSpec with Matchers { val grad = mse.backward(output, res) linear.zeroGradParameters() linear.backward(input, grad) - linear.updateParameters(0.5 / log(i + 3)) + val (weight, gradWeight) = linear.getParameters() + weight.add(-0.5 / log(i + 3), gradWeight) } val params = linear.parameters() val weight = params._1(0) @@ -236,7 +237,9 @@ class LinearSpec extends FlatSpec with Matchers { val grad = mse.backward(output, res) linear.zeroGradParameters() linear.backward(input, grad) - linear.updateParameters(0.5 / log(i + 3)) + + val (weight, gradWeight) = linear.getParameters() + weight.add(-0.5 / log(i + 3), gradWeight) } val params = linear.parameters() val weight = params._1(0) @@ -288,7 +291,8 @@ class LinearSpec extends FlatSpec with Matchers { val grad = mse.backward(output, res) linear.zeroGradParameters() linear.backward(input, grad) - linear.updateParameters(0.5 / log(i + 3)) + val (weight, gradWeight) = linear.getParameters() + weight.add(-0.5 / log(i + 3), gradWeight) } val params = linear.parameters() val weight = params._1(0) diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionSpec.scala index 7b4a3aba759..3adce43aecc 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialConvolutionSpec.scala @@ -366,8 +366,10 @@ class SpatialConvolutionSpec extends FlatSpec with Matchers { gradInputNHWC.transpose(2, 4).transpose(3, 4) .sub(gradInput).pow(2).sum() should be < 1e-7 - layer.updateParameters(0.01) - layerNHWC.updateParameters(0.01) + val (weight1, grad1) = layer.getParameters() + weight1.add(-0.01, grad1) + val (weight2, grad2) = layerNHWC.getParameters() + weight2.add(-0.01, grad2) val transWeight = layerNHWC.weight.transpose(2, 5).transpose(3, 4).transpose(4, 5) transWeight.sub(layer.weight).pow(2).sum() should be < 1e-7 @@ -2819,13 +2821,14 @@ class SpatialConvolutionSpec extends FlatSpec with Matchers { var gradOutput: Tensor[Double] = null var gradInput: Tensor[Double] = null + val (w, g) = model.getParameters() for (k <- 1 to maxIter) { model.zeroGradParameters() output = model.forward(input(k)).toTensor[Double] err = loss.forward(output, t) gradOutput = loss.backward(output, t) gradInput = model.backward(input(k), gradOutput).toTensor[Double] - model.updateParameters(0.001) + w.add(-0.001, g) } input(maxIter).map(exInput, (v1, v2) => { diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolutionSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolutionSpec.scala index 7fcf8221d00..0c8d0ca95eb 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolutionSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/nn/SpatialFullConvolutionSpec.scala @@ -210,11 +210,13 @@ class SpatialFullConvolutionSpec extends FlatSpec with Matchers { val output1 = layer.forward(input) layer.backward(input, output1) - layer.updateParameters(0.1) + val (weight, grad) = layer.getParameters() + weight.add(-0.1, grad) val output2 = layer2.forward(input) layer2.backward(input, output2) - layer2.updateParameters(0.1) + val (weight2, grad2) = layer2.getParameters() + weight2.add(-0.1, grad2) val output = layer.forward(input) val expected = layer2.forward(input) diff --git a/spark/dl/src/test/scala/com/intel/analytics/bigdl/torch/SequentialSpec.scala b/spark/dl/src/test/scala/com/intel/analytics/bigdl/torch/SequentialSpec.scala index 0da7534e77f..f37c74aec1d 100644 --- a/spark/dl/src/test/scala/com/intel/analytics/bigdl/torch/SequentialSpec.scala +++ b/spark/dl/src/test/scala/com/intel/analytics/bigdl/torch/SequentialSpec.scala @@ -98,7 +98,8 @@ class SequentialSpec extends TorchSpec { module.zeroGradParameters() gradInput = module.updateGradInput(input, gradOutput).toTensor[Double] module.accGradParameters(input, gradOutput) - module.updateParameters(0.1) + val (weight, grad) = module.getParameters() + weight.add(-0.1, grad) i += 1 }