From b6a123cb62af966ad244a4eea155d97fbbfa58cd Mon Sep 17 00:00:00 2001 From: Yiheng Wang Date: Wed, 7 Feb 2018 13:11:10 +0800 Subject: [PATCH] reorder the method sequence --- .../bigdl/nn/abstractnn/AbstractModule.scala | 387 ++++++++++-------- 1 file changed, 209 insertions(+), 178 deletions(-) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala index 0ee6cb3e9632..aff272521d11 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/abstractnn/AbstractModule.scala @@ -58,12 +58,8 @@ abstract class TensorModule[T: ClassTag] abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, T: ClassTag]( implicit ev: TensorNumeric[T]) extends Serializable with InferShape{ - private var namePostfix = Integer.toHexString(java.util.UUID.randomUUID().hashCode()) - - final private[bigdl] def getNamePostfix : String = namePostfix + // ================================= Public APIs ============================================= - final private[bigdl] def setNamePostfix(namePostfix : String) : Unit = - this.namePostfix = namePostfix /** * The cached output. So we don't compute it again when need it @@ -75,13 +71,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, */ var gradInput: A = Activity.allocate[A, T]() - /** - * The scale of gradient weight and gradient bias - * before gradParameters being accumulated. - */ - protected var scaleW: Double = 1.0 - protected var scaleB: Double = 1.0 - /** * Get the scale of gradientWeight */ @@ -139,17 +128,10 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, this } - private[nn] final def allocateAs(dest: Activity): Activity = dest match { - case tensor: Tensor[T] => Tensor[T]() - case table: Table => T() - case _ => throw new IllegalArgumentException("Activity only support tensor and table now") - } - /** - * The name of the module + * Whether user set a name to the module before + * @return */ - private var name : String = null - final def hasName: Boolean = name != null /** @@ -176,34 +158,25 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, } } - protected final def getPrintName(): String = { - val postfix = if (name == null) { - namePostfix - } else { - name - } - s"${this.getClass.getSimpleName}[${postfix}]" - - } - override def toString(): String = getPrintName - protected var forwardTime = 0L - - protected var backwardTime = 0L - + /** + * Get the forward/backward cost time for the module or its submodules + * @return + */ def getTimes(): Array[(AbstractModule[_ <: Activity, _ <: Activity, T], Long, Long)] = { Array((this, forwardTime, backwardTime)) } + /** + * Reset the forward/backward record time for the module or its submodules + * @return + */ def resetTimes(): Unit = { forwardTime = 0 backwardTime = 0 } - private var scaleWCache: Double = scaleW - private var scaleBCache: Double = scaleB - /** * freeze the module, * i.e. their parameters(weight/bias, if exists) are not changed in training process @@ -341,28 +314,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, } } - /** - * This function returns two tensors. One for the flattened trainable parameters flatParameters - * and another for the gradients of the energy wrt to the trainable parameters flatGradParameters. - * - * Custom modules should not override this function. They should instead override parameters(...) - * which is, in turn, called by the present function. - * - * This function will go over all the weights and gradWeights and make them view into a single - * tensor (one for weights and one for gradWeights). - * - * @return - */ - final private[bigdl] def getParameters(): (Tensor[T], Tensor[T]) = { - val (weightParameters, gradParameters) = this.parameters() - - // If some gradParameters are not allocated storage, allocate it - require(weightParameters.size == gradParameters.size, - "weights and gradient number are not match") - weightParameters.zip(gradParameters).foreach{ case(w, g) => g.resizeAs(w)} - (Module.flatten[T](weightParameters), Module.flatten[T](gradParameters)) - } - /** * This function returns two arrays. One for the weights and the other the gradients * Custom modules should override this function if they have parameters @@ -394,7 +345,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, if (extraParam != null && currentExtraParam != null) { require(extraParam.length == currentExtraParam.length, "state's length doesn't match, excepted:" + - s"${currentExtraParam.length}, but got ${extraParam.length}") + s"${currentExtraParam.length}, but got ${extraParam.length}") var i = 0 while (i < extraParam.length) { currentExtraParam(i).copy(extraParam(i)) @@ -424,11 +375,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, */ def getParametersTable(): Table = null - /** - * Module status. It is useful for modules like dropout/batch normalization - */ - protected var train: Boolean = true - /** * Set the module to training mode * @return @@ -460,30 +406,29 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, */ def reset(): Unit = {} - - protected var line = "\n" - + /** + * Set the line separator when print the module + * @param line + * @return + */ final def setLine(line: String): this.type = { this.line = line this } - private val engineType: EngineType = Engine.getEngineType() - /** - * get execution engine type + * Clone the model + * @return */ - def checkEngineType(): this.type = { - if (engineType != Engine.getEngineType()) { - throw new Error("Module's EngineType doesn't march global EngineType") - } - this - } - final def cloneModule(): AbstractModule[A, B, T] = { SerializationUtils.clone(this) } + /** + * Clone the module, deep or shallow copy + * @param deepCopy + * @return + */ final def clone(deepCopy : Boolean): AbstractModule[A, B, T] = { val moduleData = ModuleData[T](this. asInstanceOf[AbstractModule[Activity, Activity, T]], Seq[String](), Seq[String]()) @@ -503,64 +448,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, copy } - - final private def setWeightAndBias(copy : AbstractModule[A, B, T], deepCopy : Boolean): Unit = { - val parameterTable = this.getParametersTable - val copiedModuleParamTable = copy.getParametersTable - if (parameterTable != null) { - require(copiedModuleParamTable != null, "cloned module should have params") - parameterTable.foreach { - case (name: String, params: Table) => - require(copiedModuleParamTable.get(name) != None, s"cloned module should have for $name") - setLayerWeightAndBias(params, - copiedModuleParamTable.get(name).get.asInstanceOf[Table], deepCopy) - } - } - } - - final private def setLayerWeightAndBias(params : Table, - copyParams : Table, deepCopy : Boolean): Unit = { - params.foreach(param => { - copyParam(params, copyParams, deepCopy, param._1.toString) - }) - } - - final private def copyParam(params : Table, copyParams : Table, - deepCopy : Boolean, paraName : String) : Unit = { - if (params.contains(paraName)) { - // this is for quantization tensors where the weight might be an array - if (params.get(paraName).get - .isInstanceOf[Array[Tensor[T]]]) { - val copies = copyParams.get(paraName).get - .asInstanceOf[Array[Tensor[T]]] - val origins = params.get(paraName).get - .asInstanceOf[Array[Tensor[T]]] - var i = 0 - while (i < copies.length) { - copyTensor(origins(i), copies(i), deepCopy) - i += 1 - } - } else { - // For normal layers, their params are just tensors - copyTensor(params.get(paraName).get.asInstanceOf[Tensor[T]], - copyParams.get(paraName).get.asInstanceOf[Tensor[T]], deepCopy) - } - } - } - - final private def copyTensor(t1 : Tensor[T], t2 : Tensor[T], deepCopy : Boolean) = { - if (t2.isInstanceOf[QuantizedTensor[_]]) { - t2.asInstanceOf[QuantizedTensor[_]].release() - } - if (deepCopy) { - t2.copy(t1) - } else { - t2.set(t1) - } - } - - def canEqual(other: Any): Boolean = other.isInstanceOf[AbstractModule[A, B, T]] - override def equals(other: Any): Boolean = other match { case that: AbstractModule[A, B, T] => (that canEqual this) && @@ -585,7 +472,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param overWrite if overwrite * @return self */ - @deprecated("please use recommended saveModule(path, overWrite)") + @deprecated("please use recommended saveModule(path, overWrite)", "0.3.0") final def save(path : String, overWrite: Boolean = false) : this.type = { this.clearState() File.save(this, path, overWrite) @@ -602,7 +489,7 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @return self */ final def saveModule(path : String, weightPath : String = null, - overWrite: Boolean = false) : this.type = { + overWrite: Boolean = false) : this.type = { this.clearState() ModulePersister.saveToFile(path, weightPath, this, overWrite) this @@ -658,10 +545,10 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @return */ final def saveTF( - inputs : Seq[(String, Seq[Int])], - path: String, - byteOrder: ByteOrder = ByteOrder.LITTLE_ENDIAN, - dataFormat: TensorflowDataFormat = TensorflowDataFormat.NHWC): this.type = { + inputs : Seq[(String, Seq[Int])], + path: String, + byteOrder: ByteOrder = ByteOrder.LITTLE_ENDIAN, + dataFormat: TensorflowDataFormat = TensorflowDataFormat.NHWC): this.type = { require(this.isInstanceOf[Graph[T]], "only Graph container can be saved as Tensorflow model") this.clearState() val inTrainMode = train @@ -691,8 +578,8 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, * @param shareBuffer whether to share same memory for each batch predict results */ final def predict(dataset: RDD[Sample[T]], - batchSize: Int = -1, - shareBuffer: Boolean = false): RDD[Activity] = { + batchSize: Int = -1, + shareBuffer: Boolean = false): RDD[Activity] = { Predictor(this).predict(dataset, batchSize, shareBuffer) } @@ -751,9 +638,9 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, val weights = parameters()._1 for(i <- newWeights.indices) { // TODO: enable this checking as we don't respect shape right now. -// require(weights(i).size().deep == newWeights(i).size().deep, -// s"Mismatch shape, ${weights(i).size().mkString(",")}" + -// s" vs ${newWeights(i).size().mkString(",")} ") + // require(weights(i).size().deep == newWeights(i).size().deep, + // s"Mismatch shape, ${weights(i).size().mkString(",")}" + + // s" vs ${newWeights(i).size().mkString(",")} ") weights(i).copy(newWeights(i)) } this @@ -822,25 +709,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, this } - final private def copyWeights(target: Table, src: Table, matchAll: Boolean): Unit = { - target.foreach { - case (name: String, targetParams: Table) => - if (src.contains(name)) { - val srcParams = src[Table](name) - if (srcParams.contains("weight")) { - val w = srcParams[Tensor[T]]("weight") - targetParams[Tensor[T]]("weight").resizeAs(w).copy(w) - } - if (srcParams.contains("bias")) { - val b = srcParams[Tensor[T]]("bias") - targetParams[Tensor[T]]("bias").resizeAs(b).copy(b) - } - } else { - if (matchAll) new Exception(s"module $name cannot find corresponding weight bias") - } - } - } - /** * Build graph: some other modules point to current module * @param nodes upstream module nodes @@ -883,6 +751,17 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, curNode } + /** + * Generate graph module with start nodes + * @param startNodes + * @return + */ + def toGraph(startNodes: ModuleNode[T]*): Graph[T] = { + val starts = if (startNodes.isEmpty) Array(Input[T]()) else startNodes.toArray + val endNodes = this.getEndNodes(starts) + Graph(starts, endNodes) + } + /** * Find a module with given name. If there is no module with given name, it will return None. If * there are multiple modules with the given name, an exception will be thrown. @@ -935,6 +814,169 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, Quantization.quantize(this) } + // ================================= Internal APIs =========================================== + + private var namePostfix = Integer.toHexString(java.util.UUID.randomUUID().hashCode()) + + final private[bigdl] def getNamePostfix : String = namePostfix + + final private[bigdl] def setNamePostfix(namePostfix : String) : Unit = + this.namePostfix = namePostfix + + /** + * The scale of gradient weight and gradient bias + * before gradParameters being accumulated. + */ + protected var scaleW: Double = 1.0 + protected var scaleB: Double = 1.0 + + private[nn] final def allocateAs(dest: Activity): Activity = dest match { + case tensor: Tensor[T] => Tensor[T]() + case table: Table => T() + case _ => throw new IllegalArgumentException("Activity only support tensor and table now") + } + + /** + * The name of the module + */ + private var name : String = null + + protected final def getPrintName(): String = { + val postfix = if (name == null) { + namePostfix + } else { + name + } + s"${this.getClass.getSimpleName}[${postfix}]" + + } + + protected var forwardTime = 0L + + protected var backwardTime = 0L + + private var scaleWCache: Double = scaleW + private var scaleBCache: Double = scaleB + + /** + * This function returns two tensors. One for the flattened trainable parameters flatParameters + * and another for the gradients of the energy wrt to the trainable parameters flatGradParameters. + * + * Custom modules should not override this function. They should instead override parameters(...) + * which is, in turn, called by the present function. + * + * This function will go over all the weights and gradWeights and make them view into a single + * tensor (one for weights and one for gradWeights). + * + * @return + */ + final private[bigdl] def getParameters(): (Tensor[T], Tensor[T]) = { + val (weightParameters, gradParameters) = this.parameters() + + // If some gradParameters are not allocated storage, allocate it + require(weightParameters.size == gradParameters.size, + "weights and gradient number are not match") + weightParameters.zip(gradParameters).foreach{ case(w, g) => g.resizeAs(w)} + (Module.flatten[T](weightParameters), Module.flatten[T](gradParameters)) + } + + /** + * Module status. It is useful for modules like dropout/batch normalization + */ + protected var train: Boolean = true + + + protected var line = "\n" + + + private val engineType: EngineType = Engine.getEngineType() + + /** + * get execution engine type + */ + private[bigdl] def checkEngineType(): this.type = { + if (engineType != Engine.getEngineType()) { + throw new Error("Module's EngineType doesn't march global EngineType") + } + this + } + + final private def setWeightAndBias(copy : AbstractModule[A, B, T], deepCopy : Boolean): Unit = { + val parameterTable = this.getParametersTable + val copiedModuleParamTable = copy.getParametersTable + if (parameterTable != null) { + require(copiedModuleParamTable != null, "cloned module should have params") + parameterTable.foreach { + case (name: String, params: Table) => + require(copiedModuleParamTable.get(name) != None, s"cloned module should have for $name") + setLayerWeightAndBias(params, + copiedModuleParamTable.get(name).get.asInstanceOf[Table], deepCopy) + } + } + } + + final private def setLayerWeightAndBias(params : Table, + copyParams : Table, deepCopy : Boolean): Unit = { + params.foreach(param => { + copyParam(params, copyParams, deepCopy, param._1.toString) + }) + } + + final private def copyParam(params : Table, copyParams : Table, + deepCopy : Boolean, paraName : String) : Unit = { + if (params.contains(paraName)) { + // this is for quantization tensors where the weight might be an array + if (params.get(paraName).get + .isInstanceOf[Array[Tensor[T]]]) { + val copies = copyParams.get(paraName).get + .asInstanceOf[Array[Tensor[T]]] + val origins = params.get(paraName).get + .asInstanceOf[Array[Tensor[T]]] + var i = 0 + while (i < copies.length) { + copyTensor(origins(i), copies(i), deepCopy) + i += 1 + } + } else { + // For normal layers, their params are just tensors + copyTensor(params.get(paraName).get.asInstanceOf[Tensor[T]], + copyParams.get(paraName).get.asInstanceOf[Tensor[T]], deepCopy) + } + } + } + + final private def copyTensor(t1 : Tensor[T], t2 : Tensor[T], deepCopy : Boolean) = { + if (t2.isInstanceOf[QuantizedTensor[_]]) { + t2.asInstanceOf[QuantizedTensor[_]].release() + } + if (deepCopy) { + t2.copy(t1) + } else { + t2.set(t1) + } + } + + final private def copyWeights(target: Table, src: Table, matchAll: Boolean): Unit = { + target.foreach { + case (name: String, targetParams: Table) => + if (src.contains(name)) { + val srcParams = src[Table](name) + if (srcParams.contains("weight")) { + val w = srcParams[Tensor[T]]("weight") + targetParams[Tensor[T]]("weight").resizeAs(w).copy(w) + } + if (srcParams.contains("bias")) { + val b = srcParams[Tensor[T]]("bias") + targetParams[Tensor[T]]("bias").resizeAs(b).copy(b) + } + } else { + if (matchAll) new Exception(s"module $name cannot find corresponding weight bias") + } + } + } + + private[bigdl] def canEqual(other: Any): Boolean = other.isInstanceOf[AbstractModule[A, B, T]] + /** * Generate end nodes of current module with start nodes @@ -946,17 +988,6 @@ abstract class AbstractModule[A <: Activity: ClassTag, B <: Activity: ClassTag, endNodes } - /** - * Generate graph module with start nodes - * @param startNodes - * @return - */ - def toGraph(startNodes: ModuleNode[T]*): Graph[T] = { - val starts = if (startNodes.isEmpty) Array(Input[T]()) else startNodes.toArray - val endNodes = this.getEndNodes(starts) - Graph(starts, endNodes) - } - /** * Return classTag numerics for module serialization. If your module contains multiple classtag * in the constructor, you should override this method