[Scala], add EvalMetric TopK, F1 and Optimizer NAG, SGLD, ccSGD (#3149)

* scalapkg, add TopK and F1 EvalMetric * scalapkg, add optimizer, NAG, SGLD, ccSGD
apache · Aug 28, 2016 · 68989b6 · 68989b6
1 parent 8d21e2c
commit 68989b6
Show file tree

Hide file tree

Showing 8 changed files with 430 additions and 1 deletion.
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala
@@ -24,6 +24,8 @@ object Base {
   type ExecutorHandle = CPtrAddress
   type SymbolHandle = CPtrAddress
   type RecordIOHandle = CPtrAddress
+  type OptimizerCreator = CPtrAddress
+  type OptimizerHandle = CPtrAddress
 
   type MXUintRef = RefInt
   type MXFloatRef = RefFloat
@@ -35,6 +37,9 @@ object Base {
   type ExecutorHandleRef = RefLong
   type SymbolHandleRef = RefLong
   type RecordIOHandleRef = RefLong
+  type OptimizerCreatorRef = RefLong
+  type OptimizerHandleRef = RefLong
+
 
   try {
     try {

diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala
@@ -4,7 +4,7 @@ package ml.dmlc.mxnet
  * Base class of all evaluation metrics
  * @param name Metric name
  *
- * @author Yuan Tang, Yizhi Liu
+ * @author Yuan Tang, Yizhi Liu, Depeng Liang
  */
 abstract class EvalMetric(protected val name: String) {
 
@@ -64,6 +64,90 @@ class Accuracy extends EvalMetric("accuracy") {
   }
 }
 
+/**
+ * Calculate top k predictions accuracy
+ */
+class TopKAccuracy(topK: Int) extends EvalMetric("top_k_accuracy") {
+  require(topK > 1, "Please use Accuracy if topK is no more than 1")
+
+  override def update(labels: IndexedSeq[NDArray], preds: IndexedSeq[NDArray]): Unit = {
+    require(labels.length == preds.length,
+      "labels and predictions should have the same length.")
+
+    for ((pred, label) <- preds zip labels) {
+      val predShape = pred.shape
+      val dims = predShape.length
+      require(dims <= 2, "Predictions should be no more than 2 dims.")
+      val labelArray = label.toArray
+      val numSamples = predShape(0)
+      if (dims == 1) {
+        val predArray = pred.toArray.zipWithIndex.sortBy(_._1).reverse.map(_._2)
+        require(predArray.length == labelArray.length)
+        this.sumMetric +=
+          labelArray.zip(predArray).map { case (l, p) => if (l == p) 1 else 0 }.sum
+      } else if (dims == 2) {
+        val numclasses = predShape(1)
+        val predArray = pred.toArray.grouped(numclasses).map { a =>
+          a.zipWithIndex.sortBy(_._1).reverse.map(_._2)
+        }.toArray
+        require(predArray.length == labelArray.length)
+        val topK = Math.max(this.topK, numclasses)
+        for (j <- 0 until topK) {
+          this.sumMetric +=
+            labelArray.zip(predArray.map(_(j))).map { case (l, p) => if (l == p) 1 else 0 }.sum
+        }
+      }
+      this.numInst += numSamples
+    }
+  }
+}
+
+/**
+ * Calculate the F1 score of a binary classification problem.
+ */
+class F1 extends EvalMetric("f1") {
+  override def update(labels: IndexedSeq[NDArray], preds: IndexedSeq[NDArray]): Unit = {
+    require(labels.length == preds.length,
+      "labels and predictions should have the same length.")
+
+    for ((pred, label) <- preds zip labels) {
+      val predLabel = NDArray.argmaxChannel(pred)
+      require(label.shape == predLabel.shape,
+        s"label ${label.shape} and prediction ${predLabel.shape}" +
+        s"should have the same length.")
+      val labelArray = label.toArray
+      var unique = Array[Float]()
+      labelArray.foreach(l => if (!unique.contains(l)) unique = unique :+ l)
+      require(unique.length <= 2, "F1 currently only supports binary classification.")
+
+      var truePositives, falsePositives, falseNegatives = 0f
+      for ((labelElem, predElem) <- labelArray zip predLabel.toArray) {
+        if (predElem == 1 && labelElem == 1) truePositives += 1
+        else if (predElem == 1 && labelElem == 0) falsePositives += 1
+        else if (predElem == 0 && labelElem == 1) falseNegatives += 1
+      }
+
+      val precision = {
+        if (truePositives + falsePositives > 0) truePositives / (truePositives + falsePositives)
+        else 0f
+      }
+
+      val recall = {
+        if (truePositives + falseNegatives > 0) truePositives / (truePositives + falseNegatives)
+        else 0f
+      }
+
+      val f1Score = {
+        if (precision + recall > 0) (2 * precision * recall) / (precision + recall)
+        else 0f
+      }
+
+      this.sumMetric += f1Score
+      this.numInst += 1
+    }
+  }
+}
+
 // Regression metrics
 
 /**

diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala
@@ -240,4 +240,18 @@ class LibInfo {
   @native def mxRecordIOReaderReadRecord(handle: RecordIOHandle, buf: RefString): Int
   @native def mxRecordIOWriterTell(handle: RecordIOHandle, pos: RefInt): Int
   @native def mxRecordIOReaderSeek(handle: RecordIOHandle, pos: Int): Int
+
+  @native def mxOptimizerFindCreator(key: String, out: OptimizerCreatorRef): Int
+  @native def mxOptimizerCreateOptimizer(creator: OptimizerCreator,
+                                         numParam: Int,
+                                         keys: Array[String],
+                                         vals: Array[String],
+                                         out: OptimizerHandleRef): Int
+  @native def mxOptimizerFree(handle: OptimizerHandle): Int
+  @native def mxOptimizerUpdate(handle: OptimizerHandle,
+                                index: Int,
+                                weight: NDArrayHandle,
+                                grad: NDArrayHandle,
+                                lr: Float,
+                                wd: Float): Int
 }
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/NAG.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/NAG.scala
@@ -0,0 +1,91 @@
+package ml.dmlc.mxnet.optimizer
+
+import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray}
+import ml.dmlc.mxnet.NDArrayConversions._
+
+/**
+ * SGD with nesterov.
+ * It is implemented according to
+ * https://github.com/torch/optim/blob/master/sgd.lua
+ *
+ * @author Depeng Liang
+ *
+ * @param learningRate Float, Step size.
+ * @param momentum Float, momentum value.
+ * @param wd Float, L2 regularization coefficient add to all the weights
+ * @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient]
+ * @param lrScheduler The learning rate scheduler
+ */
+class NAG(val learningRate: Float = 0.01f, val momentum: Float = 0.0f,
+          val wd: Float = 0.0001f, val clipGradient: Float = 0f,
+          val lrScheduler: LRScheduler = null) extends Optimizer {
+
+  if (lrScheduler != null) {
+    lrScheduler.baseLR = learningRate
+  }
+
+  /**
+   * Update the parameters.
+   * @param index An unique integer key used to index the parameters
+   * @param weight weight ndarray
+   * @param grad grad ndarray
+   * @param state NDArray or other objects returned by initState
+   *              The auxiliary state used in optimization.
+   */
+  override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = {
+    // TODO(bing) implement wd_bias, wd_gamma, wd_beta (copy from python package)
+    val lr =
+      (if (lrScheduler != null) {
+        val scheduledLr = lrScheduler(numUpdate)
+        updateCount(index)
+        scheduledLr
+      } else {
+        this.learningRate
+      }) * lrScale.getOrElse(index, 1f)
+
+    val wd = getWd(index, this.wd)
+    var resdGrad = grad * this.rescaleGrad
+    if (clipGradient != 0f) {
+      // to get rid of memory leak
+      val oldResdGrad = resdGrad
+      resdGrad = NDArray.clip(resdGrad, -clipGradient, clipGradient)
+      oldResdGrad.dispose()
+    }
+
+    if (state != null) {
+      val mom = state.asInstanceOf[NDArray]
+      mom *= momentum
+      resdGrad += wd * weight
+      mom += resdGrad
+      resdGrad += momentum * mom
+      weight += -lr * resdGrad
+    } else {
+      require(momentum == 0f)
+      // adder = -lr * (resdGrad + this.wd * weight)
+      // we write in this way to get rid of memory leak
+      val adder = this.wd * weight
+      adder += resdGrad
+      adder *= (-lr)
+      weight += adder
+      adder.dispose()
+    }
+
+    resdGrad.dispose()
+  }
+
+  // Create additional optimizer state such as momentum.
+  override def createState(index: Int, weight: NDArray): AnyRef = {
+    if (momentum == 0.0f) {
+      null
+    } else {
+      NDArray.zeros(weight.shape, weight.context)
+    }
+  }
+
+  // Dispose the state it created
+  override def disposeState(state: AnyRef): Unit = {
+    if (state != null) {
+      state.asInstanceOf[NDArray].dispose()
+    }
+  }
+}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala
@@ -10,6 +10,11 @@ import ml.dmlc.mxnet.NDArrayConversions._
 class SGD(private val learningRate: Float = 0.01f, private val momentum: Float = 0.0f,
           private val wd: Float = 0.0001f, private val clipGradient: Float = 0f,
           private val lrScheduler: LRScheduler = null) extends Optimizer {
+
+  if (lrScheduler != null) {
+    lrScheduler.baseLR = learningRate
+  }
+
   /**
    * Update the parameters.
    * @param index An unique integer key used to index the parameters

diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGLD.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGLD.scala
@@ -0,0 +1,70 @@
+package ml.dmlc.mxnet.optimizer
+
+import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray}
+import ml.dmlc.mxnet.NDArrayConversions._
+import ml.dmlc.mxnet.Random
+
+/**
+ * Stochastic Langevin Dynamics Updater to sample from a distribution.
+ *
+ * @author Depeng Liang
+ *
+ * @param learningRate Float, Step size.
+ * @param rescaleGradient Float, rescaling factor of gradient.
+ * @param wd Float, L2 regularization coefficient add to all the weights
+ * @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient]
+ * @param lrScheduler The learning rate scheduler
+ */
+class SGLD(val learningRate: Float = 0.01f, val rescaleGradient: Float = 1.0f,
+           val wd: Float = 0.0001f, val clipGradient: Float = 0f,
+           val lrScheduler: LRScheduler = null) extends Optimizer {
+
+  if (lrScheduler != null) {
+    lrScheduler.baseLR = learningRate
+  }
+
+  /**
+   * Update the parameters.
+   * @param index An unique integer key used to index the parameters
+   * @param weight weight ndarray
+   * @param grad grad ndarray
+   * @param state NDArray or other objects returned by initState
+   *              The auxiliary state used in optimization.
+   */
+  override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = {
+    val lr =
+      (if (lrScheduler != null) {
+        val scheduledLr = lrScheduler(numUpdate)
+        updateCount(index)
+        scheduledLr
+      } else {
+        this.learningRate
+      }) * lrScale.getOrElse(index, 1f)
+
+    val wd = getWd(index, this.wd)
+    var resdGrad = grad * this.rescaleGrad
+    if (clipGradient != 0f) {
+      // to get rid of memory leak
+      val oldResdGrad = resdGrad
+      resdGrad = NDArray.clip(resdGrad, -clipGradient, clipGradient)
+      oldResdGrad.dispose()
+    }
+
+    val adder = this.wd * weight
+    adder += resdGrad
+    adder *= -(lr / 2)
+    val norm = Random.normal(0f, Math.sqrt(lr).toFloat, weight.shape, weight.context)
+    adder += norm
+    weight += adder
+    adder.dispose()
+    norm.dispose()
+  }
+
+  // Create additional optimizer state such as momentum.
+  override def createState(index: Int, weight: NDArray): AnyRef = {
+    null
+  }
+
+  // Dispose the state it created
+  override def disposeState(state: AnyRef): Unit = {}
+}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/ccSGD.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/ccSGD.scala
@@ -0,0 +1,76 @@
+package ml.dmlc.mxnet.optimizer
+
+import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray}
+import ml.dmlc.mxnet.NDArrayConversions._
+import ml.dmlc.mxnet.Base._
+
+
+/**
+ * A very simple SGD optimizer with momentum and weight regularization.
+ * Implemented in C++.
+ *
+ * @author Depeng Liang
+ *
+ * @param learningRate Float, Step size.
+ * @param momentum Float, momentum value.
+ * @param rescaleGradient Float, rescaling factor of gradient.
+ * @param wd Float, L2 regularization coefficient add to all the weights
+ * @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient]
+ * @param lrScheduler The learning rate scheduler
+ */
+class ccSGD(val learningRate: Float = 0.01f, val momentum: Float = 0.0f,
+            val wd: Float = 0.0001f, val rescaleGradient: Float = 1.0f,
+            val clipGradient: Float = -1f, val lrScheduler: LRScheduler = null
+    ) extends Optimizer {
+
+  if (lrScheduler != null) {
+    lrScheduler.baseLR = learningRate
+  }
+
+  private val optCreator = new OptimizerCreatorRef
+  private val optHandle = new OptimizerHandleRef
+
+  checkCall(_LIB.mxOptimizerFindCreator("ccsgd", optCreator))
+  private val paramKeys = Array("momentum", "rescale_grad", "clip_gradient")
+  private val paramvals = Array(s"$momentum", s"$rescaleGradient", s"$clipGradient")
+  checkCall(_LIB.mxOptimizerCreateOptimizer(
+    optCreator.value, paramKeys.length, paramKeys, paramvals, optHandle))
+
+  /**
+   * Update the parameters.
+   * @param index An unique integer key used to index the parameters
+   * @param weight weight ndarray
+   * @param grad grad ndarray
+   * @param state NDArray or other objects returned by initState
+   *              The auxiliary state used in optimization.
+   */
+  override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = {
+    val lr =
+      (if (lrScheduler != null) {
+        val scheduledLr = lrScheduler(numUpdate)
+        updateCount(index)
+        scheduledLr
+      } else {
+        this.learningRate
+      }) * lrScale.getOrElse(index, 1f)
+
+    val wd = getWd(index, this.wd)
+    checkCall(_LIB.mxOptimizerUpdate(optHandle.value, index, weight.handle, grad.handle, lr, wd))
+  }
+
+  // Create additional optimizer state such as momentum.
+  override def createState(index: Int, weight: NDArray): AnyRef = {
+    null
+  }
+
+  // Dispose the state it created
+  override def disposeState(state: AnyRef): Unit = {}
+
+  /**
+   * Free the optimizer handle.
+   * The object shall never be used after it is disposed.
+   */
+  def dispose(): Unit = {
+    checkCall(_LIB.mxOptimizerFree(optHandle.value))
+  }
+}