add some scala doc

intel-analytics · Mar 29, 2017 · c92937a · c92937a
1 parent bb6c19f
commit c92937a
Show file tree

Hide file tree

Showing 13 changed files with 87 additions and 11 deletions.
diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/Transformer.scala
@@ -64,6 +64,9 @@ object Identity {
   def apply[A](): Identity[A] = new Identity[A]()
 }
 
+/**
+ * Just transform the input to output.
+ */
 class Identity[A] extends Transformer[A, A] {
   override def apply(prev: Iterator[A]): Iterator[A] = {
     prev

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/image/Types.scala
@@ -242,6 +242,13 @@ class BGRImage(
 }
 
 
+/**
+ * A BGR Image with label.
+ * @param d data
+ * @param w width
+ * @param h height
+ * @param _label a float label
+ */
 class LabeledBGRImage(d: Array[Float], w: Int, h: Int,
   protected var _label : Float) extends BGRImage(d, w, h) with Label[Float] {
 

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/text/Dictionary.scala
@@ -29,7 +29,6 @@ import scala.util.Random
  * either from tokenized text or from saved dictionary
  *
  */
-
 class Dictionary()
   extends Serializable {
 
@@ -64,7 +63,7 @@ class Dictionary()
   /**
    * return the encoding number of a word,
    * if word does not existed in the dictionary,
-   * it will return the dictionarylength as the default index.
+   * it will return the dictionary length as the default index.
    * @param word
    */
   def getIndex(word: String): Int = {
@@ -81,7 +80,7 @@ class Dictionary()
 
   /**
    * return the word with regard to the index,
-   * if index is out of bounary, it will randomly
+   * if index is out of boundary, it will randomly
    * return a word in the discarded word list.
    * If discard word list is Empty, it will randomly
    * return a word in the existed dictionary.
@@ -93,11 +92,17 @@ class Dictionary()
       else getWord(Random.nextInt(_vocabSize)))
   }
 
+  /**
+   * print word-to-index dictionary
+   */
   def print(): Unit = {
     _word2index.foreach(x =>
       logger.info(x._1 + " -> " + x._2))
   }
 
+  /**
+   * print discard dictionary
+   */
   def printDiscard(): Unit = {
     _discardVocab.foreach(x =>
       logger.info(x))
@@ -135,6 +140,7 @@ class Dictionary()
 
     update(freqDict.toSeq, vocabSize)
   }
+
   def this(sentences: Iterator[Array[String]],
            vocabSize: Int) = {
     this()
@@ -207,10 +213,13 @@ class Dictionary()
 object Dictionary {
   def apply[S <: Iterator[Array[String]]](sentences: S, vocabSize: Int)
   : Dictionary = new Dictionary(sentences, vocabSize)
+
   def apply(dataset: Stream[Array[String]], vocabSize: Int)
   : Dictionary = new Dictionary(dataset, vocabSize)
+
   def apply(directory: String)
   : Dictionary = new Dictionary(directory)
+
   def apply(dataset: RDD[Array[String]], vocabSize: Int = 10000)
   : Dictionary = new Dictionary(dataset, vocabSize)
 }
diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Concat.scala
@@ -24,6 +24,20 @@ import com.intel.analytics.bigdl.utils.Engine
 import scala.concurrent.Future
 import scala.reflect.ClassTag
 
+/**
+ * Concat concatenates the output of one layer of "parallel"
+ * modules along the provided {@code dimension}: they take the
+ * same inputs, and their output is concatenated.
+ *                 +-----------+
+ *            +---->  module1  -----+
+ *            |    |           |    |
+ * input -----+---->  module2  -----+----> output
+ *            |    |           |    |
+ *            +---->  module3  -----+
+ *                 +-----------+
+ *
+ * @param dimension dimension
+ */
 @SerialVersionUID(- 5218461876031660707L)
 class Concat[T: ClassTag](val dimension: Int)(
   implicit ev: TensorNumeric[T]) extends Container[Tensor[T], Tensor[T], T] {
@@ -35,10 +49,6 @@ class Concat[T: ClassTag](val dimension: Int)(
 
   protected var forwardTimeOverhead = 0L
 
-  def getSize(): Array[Int] = {
-    return size
-  }
-
   override def updateOutput(input: Tensor[T]): Tensor[T] = {
     val outs = new Array[Tensor[T]](this.modules.length)
     var i = 0

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/ConcatTable.scala
@@ -23,6 +23,13 @@ import com.intel.analytics.bigdl.utils.{T, Table}
 
 import scala.reflect.ClassTag
 
+/**
+ * ConcateTable is a container module like Concate. Applies an input
+ * to each member module, input can be a tensor or a table.
+ *
+ * ConcateTable usually works with CAddTable and CMulTable to
+ * implement element wise add/multiply on outputs of two modules.
+ */
 @SerialVersionUID(- 704681653938468956L)
 class ConcatTable[T : ClassTag]
   (implicit ev: TensorNumeric[T]) extends Container[Activity, Table, T] {

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Dropout.scala
@@ -24,6 +24,16 @@ import com.intel.analytics.bigdl.utils.RandomGenerator._
 import scala.concurrent.Future
 import scala.reflect.ClassTag
 
+/**
+ * Dropout masks(set to zero) parts of input using a bernoulli distribution.
+ * Each input element has a probability initP of being dropped. If scale is
+ * set, the outputs are scaled by a factor of 1/(1-initP) during training.
+ * During evaluating, output is the same as input.
+ *
+ * @param initP probability to be dropped
+ * @param inplace inplace model
+ * @param scale if scale by a factor of 1/(1-initP)
+ */
 @SerialVersionUID(- 4636332259181125718L)
 class Dropout[T: ClassTag](
   val initP: Double = 0.5, val inplace: Boolean = false, var scale: Boolean = true)(
@@ -34,6 +44,10 @@ class Dropout[T: ClassTag](
   @transient
   protected var results: Array[Future[Unit]] = null
 
+  /**
+   * Get current probability to be dropped.
+   * @return p
+   */
   def getP(): T = {
     return ev.fromType[Double](p)
   }
@@ -161,6 +175,11 @@ class Dropout[T: ClassTag](
     this
   }
 
+  /**
+   * Set current probability to be dropped.
+   * @param p new probability
+   * @return
+   */
   def setP(p: Double): this.type = {
     this.p = p
     this

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Exp.scala
@@ -21,12 +21,17 @@ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 
 import scala.reflect.ClassTag
 
+/**
+ * Applies element-wise exp to input tensor.
+ */
 @SerialVersionUID(4918769744611296463L)
 class Exp[@specialized(Float, Double) T: ClassTag] (implicit ev: TensorNumeric[T])
   extends TensorModule[T] {
+
   override def updateOutput(input: Tensor[T]): Tensor[T] = {
     output.exp(input)
   }
+
   override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
     gradInput
       .resizeAs(gradOutput)

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/HardTanh.scala
@@ -23,6 +23,16 @@ import com.intel.analytics.bigdl.utils.Engine
 import scala.concurrent.Future
 import scala.reflect.ClassTag
 
+/**
+ * Applies HardTanh to each element of input, HardTanh is defined:
+ *          ⎧  maxValue, if x > maxValue
+ *   f(x) = ⎨  minValue, if x < minValue
+ *          ⎩  x, otherwise
+ *
+ * @param minValue minValue in f(x), default is -1.
+ * @param maxValue maxValue in f(x), default is 1.
+ * @param inplace inplace model.
+ */
 @SerialVersionUID(- 8953866090802444183L)
 class HardTanh[T: ClassTag](
   val minValue: Double = -1,

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/Identity.scala
@@ -20,6 +20,10 @@ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 
 import scala.reflect.ClassTag
 
+/**
+ * Identity just return the input to output.
+ * It's useful in same parallel container to get an origin input.
+ */
 @SerialVersionUID(- 8429221694319933625L)
 class Identity[T: ClassTag]()
   (implicit ev: TensorNumeric[T]) extends AbstractModule[Activity, Activity, T] {

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/AllReduceParameter.scala
@@ -230,7 +230,7 @@ class AllReduceParameter[T: ClassTag](id: Long, partitionNum: Int,
   }
 }
 
-class FutureResult[T](private val futures: Seq[Future[T]]) {
+private[bigdl] class FutureResult[T](private val futures: Seq[Future[T]]) {
   def waitResult(): Seq[T] = {
     futures.map(_.get())
   }

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/FP16CompressedTensor.scala
@@ -23,7 +23,10 @@ import com.intel.analytics.bigdl.utils.Engine
 
 import scala.reflect._
 
-class FP16CompressedTensor[T: ClassTag](buffer: Array[Byte], bufferOffset: Int, bufferLength: Int)
+private[bigdl] class FP16CompressedTensor[T: ClassTag](
+      buffer: Array[Byte],
+      bufferOffset: Int,
+      bufferLength: Int)
   extends CompressedTensor[T] {
 
   def this(tensor: Tensor[T]) {

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/parameters/Parameter.scala
@@ -22,7 +22,7 @@ import com.intel.analytics.bigdl.tensor.Tensor
 
 import scala.reflect.ClassTag
 
-abstract trait CompressedTensor[T] extends Serializable {
+private[bigdl] trait CompressedTensor[T] extends Serializable {
 
   def deCompress(srcOffset: Int, tensor: Tensor[T], tgtOffset: Int, length: Int): Unit
 

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/utils/TorchFile.scala
@@ -513,7 +513,6 @@ object TorchFile {
     for (i <- 1 to source.modules.length) {
       modules(i) = source.modules(i - 1)
     }
-    table("size") = source.getSize()
     table("dimension") = source.dimension
     table("modules") = modules
     writeObject(table, rawData, path, TYPE_TABLE)