From c92937af51e9398e513b72c43859dad9b80a3896 Mon Sep 17 00:00:00 2001 From: qiuxin2012 Date: Tue, 28 Mar 2017 15:48:58 +0800 Subject: [PATCH] add some scala doc --- .../analytics/bigdl/dataset/Transformer.scala | 3 +++ .../analytics/bigdl/dataset/image/Types.scala | 7 +++++++ .../bigdl/dataset/text/Dictionary.scala | 15 ++++++++++++--- .../com/intel/analytics/bigdl/nn/Concat.scala | 18 ++++++++++++++---- .../analytics/bigdl/nn/ConcatTable.scala | 7 +++++++ .../intel/analytics/bigdl/nn/Dropout.scala | 19 +++++++++++++++++++ .../com/intel/analytics/bigdl/nn/Exp.scala | 5 +++++ .../intel/analytics/bigdl/nn/HardTanh.scala | 10 ++++++++++ .../intel/analytics/bigdl/nn/Identity.scala | 4 ++++ .../bigdl/parameters/AllReduceParameter.scala | 2 +- .../parameters/FP16CompressedTensor.scala | 5 ++++- .../bigdl/parameters/Parameter.scala | 2 +- .../analytics/bigdl/utils/TorchFile.scala | 1 - 13 files changed, 87 insertions(+), 11 deletions(-) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala index c4e5a1f977a..28564a9009d 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala @@ -64,6 +64,9 @@ object Identity { def apply[A](): Identity[A] = new Identity[A]() } +/** + * Just transform the input to output. + */ class Identity[A] extends Transformer[A, A] { override def apply(prev: Iterator[A]): Iterator[A] = { prev diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala index c3cdb96bf71..df8d44ba2d7 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala @@ -242,6 +242,13 @@ class BGRImage( } +/** + * A BGR Image with label. + * @param d data + * @param w width + * @param h height + * @param _label a float label + */ class LabeledBGRImage(d: Array[Float], w: Int, h: Int, protected var _label : Float) extends BGRImage(d, w, h) with Label[Float] { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala index 14569fdc4bc..1958faf769f 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala @@ -29,7 +29,6 @@ import scala.util.Random * either from tokenized text or from saved dictionary * */ - class Dictionary() extends Serializable { @@ -64,7 +63,7 @@ class Dictionary() /** * return the encoding number of a word, * if word does not existed in the dictionary, - * it will return the dictionarylength as the default index. + * it will return the dictionary length as the default index. * @param word */ def getIndex(word: String): Int = { @@ -81,7 +80,7 @@ class Dictionary() /** * return the word with regard to the index, - * if index is out of bounary, it will randomly + * if index is out of boundary, it will randomly * return a word in the discarded word list. * If discard word list is Empty, it will randomly * return a word in the existed dictionary. @@ -93,11 +92,17 @@ class Dictionary() else getWord(Random.nextInt(_vocabSize))) } + /** + * print word-to-index dictionary + */ def print(): Unit = { _word2index.foreach(x => logger.info(x._1 + " -> " + x._2)) } + /** + * print discard dictionary + */ def printDiscard(): Unit = { _discardVocab.foreach(x => logger.info(x)) @@ -135,6 +140,7 @@ class Dictionary() update(freqDict.toSeq, vocabSize) } + def this(sentences: Iterator[Array[String]], vocabSize: Int) = { this() @@ -207,10 +213,13 @@ class Dictionary() object Dictionary { def apply[S <: Iterator[Array[String]]](sentences: S, vocabSize: Int) : Dictionary = new Dictionary(sentences, vocabSize) + def apply(dataset: Stream[Array[String]], vocabSize: Int) : Dictionary = new Dictionary(dataset, vocabSize) + def apply(directory: String) : Dictionary = new Dictionary(directory) + def apply(dataset: RDD[Array[String]], vocabSize: Int = 10000) : Dictionary = new Dictionary(dataset, vocabSize) } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala index f956fbbadee..947fe53efd0 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala @@ -24,6 +24,20 @@ import com.intel.analytics.bigdl.utils.Engine import scala.concurrent.Future import scala.reflect.ClassTag +/** + * Concat concatenates the output of one layer of "parallel" + * modules along the provided {@code dimension}: they take the + * same inputs, and their output is concatenated. + * +-----------+ + * +----> module1 -----+ + * | | | | + * input -----+----> module2 -----+----> output + * | | | | + * +----> module3 -----+ + * +-----------+ + * + * @param dimension dimension + */ @SerialVersionUID(- 5218461876031660707L) class Concat[T: ClassTag](val dimension: Int)( implicit ev: TensorNumeric[T]) extends Container[Tensor[T], Tensor[T], T] { @@ -35,10 +49,6 @@ class Concat[T: ClassTag](val dimension: Int)( protected var forwardTimeOverhead = 0L - def getSize(): Array[Int] = { - return size - } - override def updateOutput(input: Tensor[T]): Tensor[T] = { val outs = new Array[Tensor[T]](this.modules.length) var i = 0 diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala index 1b9fe5cddc6..685f56fccb1 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala @@ -23,6 +23,13 @@ import com.intel.analytics.bigdl.utils.{T, Table} import scala.reflect.ClassTag +/** + * ConcateTable is a container module like Concate. Applies an input + * to each member module, input can be a tensor or a table. + * + * ConcateTable usually works with CAddTable and CMulTable to + * implement element wise add/multiply on outputs of two modules. + */ @SerialVersionUID(- 704681653938468956L) class ConcatTable[T : ClassTag] (implicit ev: TensorNumeric[T]) extends Container[Activity, Table, T] { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala index 5cc2bb989fe..f1a98217549 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala @@ -24,6 +24,16 @@ import com.intel.analytics.bigdl.utils.RandomGenerator._ import scala.concurrent.Future import scala.reflect.ClassTag +/** + * Dropout masks(set to zero) parts of input using a bernoulli distribution. + * Each input element has a probability initP of being dropped. If scale is + * set, the outputs are scaled by a factor of 1/(1-initP) during training. + * During evaluating, output is the same as input. + * + * @param initP probability to be dropped + * @param inplace inplace model + * @param scale if scale by a factor of 1/(1-initP) + */ @SerialVersionUID(- 4636332259181125718L) class Dropout[T: ClassTag]( val initP: Double = 0.5, val inplace: Boolean = false, var scale: Boolean = true)( @@ -34,6 +44,10 @@ class Dropout[T: ClassTag]( @transient protected var results: Array[Future[Unit]] = null + /** + * Get current probability to be dropped. + * @return p + */ def getP(): T = { return ev.fromType[Double](p) } @@ -161,6 +175,11 @@ class Dropout[T: ClassTag]( this } + /** + * Set current probability to be dropped. + * @param p new probability + * @return + */ def setP(p: Double): this.type = { this.p = p this diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala index 85999ebaa5e..b44511625e1 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala @@ -21,12 +21,17 @@ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.reflect.ClassTag +/** + * Applies element-wise exp to input tensor. + */ @SerialVersionUID(4918769744611296463L) class Exp[@specialized(Float, Double) T: ClassTag] (implicit ev: TensorNumeric[T]) extends TensorModule[T] { + override def updateOutput(input: Tensor[T]): Tensor[T] = { output.exp(input) } + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { gradInput .resizeAs(gradOutput) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala index ec87ff37826..f26cda93195 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala @@ -23,6 +23,16 @@ import com.intel.analytics.bigdl.utils.Engine import scala.concurrent.Future import scala.reflect.ClassTag +/** + * Applies HardTanh to each element of input, HardTanh is defined: + * ⎧ maxValue, if x > maxValue + * f(x) = ⎨ minValue, if x < minValue + * ⎩ x, otherwise + * + * @param minValue minValue in f(x), default is -1. + * @param maxValue maxValue in f(x), default is 1. + * @param inplace inplace model. + */ @SerialVersionUID(- 8953866090802444183L) class HardTanh[T: ClassTag]( val minValue: Double = -1, diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala index d1587b08390..c1b9f460745 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala @@ -20,6 +20,10 @@ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.reflect.ClassTag +/** + * Identity just return the input to output. + * It's useful in same parallel container to get an origin input. + */ @SerialVersionUID(- 8429221694319933625L) class Identity[T: ClassTag]() (implicit ev: TensorNumeric[T]) extends AbstractModule[Activity, Activity, T] { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala index 8bbde8bafe3..7c1e529a27f 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala @@ -230,7 +230,7 @@ class AllReduceParameter[T: ClassTag](id: Long, partitionNum: Int, } } -class FutureResult[T](private val futures: Seq[Future[T]]) { +private[bigdl] class FutureResult[T](private val futures: Seq[Future[T]]) { def waitResult(): Seq[T] = { futures.map(_.get()) } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala index b87ac70e548..aa0e3dcd073 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala @@ -23,7 +23,10 @@ import com.intel.analytics.bigdl.utils.Engine import scala.reflect._ -class FP16CompressedTensor[T: ClassTag](buffer: Array[Byte], bufferOffset: Int, bufferLength: Int) +private[bigdl] class FP16CompressedTensor[T: ClassTag]( + buffer: Array[Byte], + bufferOffset: Int, + bufferLength: Int) extends CompressedTensor[T] { def this(tensor: Tensor[T]) { diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala index bded4039386..32941d1553e 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala @@ -22,7 +22,7 @@ import com.intel.analytics.bigdl.tensor.Tensor import scala.reflect.ClassTag -abstract trait CompressedTensor[T] extends Serializable { +private[bigdl] trait CompressedTensor[T] extends Serializable { def deCompress(srcOffset: Int, tensor: Tensor[T], tgtOffset: Int, length: Int): Unit diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala index 0ae89abd324..0b2cb54d9d5 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala @@ -513,7 +513,6 @@ object TorchFile { for (i <- 1 to source.modules.length) { modules(i) = source.modules(i - 1) } - table("size") = source.getSize() table("dimension") = source.dimension table("modules") = modules writeObject(table, rawData, path, TYPE_TABLE)