This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Scala], add EvalMetric TopK, F1 and Optimizer NAG, SGLD, ccSGD (#3149)
* scalapkg, add TopK and F1 EvalMetric * scalapkg, add optimizer, NAG, SGLD, ccSGD
- Loading branch information
Showing
8 changed files
with
430 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/NAG.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package ml.dmlc.mxnet.optimizer | ||
|
||
import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray} | ||
import ml.dmlc.mxnet.NDArrayConversions._ | ||
|
||
/** | ||
* SGD with nesterov. | ||
* It is implemented according to | ||
* https://github.com/torch/optim/blob/master/sgd.lua | ||
* | ||
* @author Depeng Liang | ||
* | ||
* @param learningRate Float, Step size. | ||
* @param momentum Float, momentum value. | ||
* @param wd Float, L2 regularization coefficient add to all the weights | ||
* @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient] | ||
* @param lrScheduler The learning rate scheduler | ||
*/ | ||
class NAG(val learningRate: Float = 0.01f, val momentum: Float = 0.0f, | ||
val wd: Float = 0.0001f, val clipGradient: Float = 0f, | ||
val lrScheduler: LRScheduler = null) extends Optimizer { | ||
|
||
if (lrScheduler != null) { | ||
lrScheduler.baseLR = learningRate | ||
} | ||
|
||
/** | ||
* Update the parameters. | ||
* @param index An unique integer key used to index the parameters | ||
* @param weight weight ndarray | ||
* @param grad grad ndarray | ||
* @param state NDArray or other objects returned by initState | ||
* The auxiliary state used in optimization. | ||
*/ | ||
override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = { | ||
// TODO(bing) implement wd_bias, wd_gamma, wd_beta (copy from python package) | ||
val lr = | ||
(if (lrScheduler != null) { | ||
val scheduledLr = lrScheduler(numUpdate) | ||
updateCount(index) | ||
scheduledLr | ||
} else { | ||
this.learningRate | ||
}) * lrScale.getOrElse(index, 1f) | ||
|
||
val wd = getWd(index, this.wd) | ||
var resdGrad = grad * this.rescaleGrad | ||
if (clipGradient != 0f) { | ||
// to get rid of memory leak | ||
val oldResdGrad = resdGrad | ||
resdGrad = NDArray.clip(resdGrad, -clipGradient, clipGradient) | ||
oldResdGrad.dispose() | ||
} | ||
|
||
if (state != null) { | ||
val mom = state.asInstanceOf[NDArray] | ||
mom *= momentum | ||
resdGrad += wd * weight | ||
mom += resdGrad | ||
resdGrad += momentum * mom | ||
weight += -lr * resdGrad | ||
} else { | ||
require(momentum == 0f) | ||
// adder = -lr * (resdGrad + this.wd * weight) | ||
// we write in this way to get rid of memory leak | ||
val adder = this.wd * weight | ||
adder += resdGrad | ||
adder *= (-lr) | ||
weight += adder | ||
adder.dispose() | ||
} | ||
|
||
resdGrad.dispose() | ||
} | ||
|
||
// Create additional optimizer state such as momentum. | ||
override def createState(index: Int, weight: NDArray): AnyRef = { | ||
if (momentum == 0.0f) { | ||
null | ||
} else { | ||
NDArray.zeros(weight.shape, weight.context) | ||
} | ||
} | ||
|
||
// Dispose the state it created | ||
override def disposeState(state: AnyRef): Unit = { | ||
if (state != null) { | ||
state.asInstanceOf[NDArray].dispose() | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGLD.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package ml.dmlc.mxnet.optimizer | ||
|
||
import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray} | ||
import ml.dmlc.mxnet.NDArrayConversions._ | ||
import ml.dmlc.mxnet.Random | ||
|
||
/** | ||
* Stochastic Langevin Dynamics Updater to sample from a distribution. | ||
* | ||
* @author Depeng Liang | ||
* | ||
* @param learningRate Float, Step size. | ||
* @param rescaleGradient Float, rescaling factor of gradient. | ||
* @param wd Float, L2 regularization coefficient add to all the weights | ||
* @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient] | ||
* @param lrScheduler The learning rate scheduler | ||
*/ | ||
class SGLD(val learningRate: Float = 0.01f, val rescaleGradient: Float = 1.0f, | ||
val wd: Float = 0.0001f, val clipGradient: Float = 0f, | ||
val lrScheduler: LRScheduler = null) extends Optimizer { | ||
|
||
if (lrScheduler != null) { | ||
lrScheduler.baseLR = learningRate | ||
} | ||
|
||
/** | ||
* Update the parameters. | ||
* @param index An unique integer key used to index the parameters | ||
* @param weight weight ndarray | ||
* @param grad grad ndarray | ||
* @param state NDArray or other objects returned by initState | ||
* The auxiliary state used in optimization. | ||
*/ | ||
override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = { | ||
val lr = | ||
(if (lrScheduler != null) { | ||
val scheduledLr = lrScheduler(numUpdate) | ||
updateCount(index) | ||
scheduledLr | ||
} else { | ||
this.learningRate | ||
}) * lrScale.getOrElse(index, 1f) | ||
|
||
val wd = getWd(index, this.wd) | ||
var resdGrad = grad * this.rescaleGrad | ||
if (clipGradient != 0f) { | ||
// to get rid of memory leak | ||
val oldResdGrad = resdGrad | ||
resdGrad = NDArray.clip(resdGrad, -clipGradient, clipGradient) | ||
oldResdGrad.dispose() | ||
} | ||
|
||
val adder = this.wd * weight | ||
adder += resdGrad | ||
adder *= -(lr / 2) | ||
val norm = Random.normal(0f, Math.sqrt(lr).toFloat, weight.shape, weight.context) | ||
adder += norm | ||
weight += adder | ||
adder.dispose() | ||
norm.dispose() | ||
} | ||
|
||
// Create additional optimizer state such as momentum. | ||
override def createState(index: Int, weight: NDArray): AnyRef = { | ||
null | ||
} | ||
|
||
// Dispose the state it created | ||
override def disposeState(state: AnyRef): Unit = {} | ||
} |
76 changes: 76 additions & 0 deletions
76
scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/ccSGD.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package ml.dmlc.mxnet.optimizer | ||
|
||
import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray} | ||
import ml.dmlc.mxnet.NDArrayConversions._ | ||
import ml.dmlc.mxnet.Base._ | ||
|
||
|
||
/** | ||
* A very simple SGD optimizer with momentum and weight regularization. | ||
* Implemented in C++. | ||
* | ||
* @author Depeng Liang | ||
* | ||
* @param learningRate Float, Step size. | ||
* @param momentum Float, momentum value. | ||
* @param rescaleGradient Float, rescaling factor of gradient. | ||
* @param wd Float, L2 regularization coefficient add to all the weights | ||
* @param clipGradient Float, clip gradient in range [-clip_gradient, clip_gradient] | ||
* @param lrScheduler The learning rate scheduler | ||
*/ | ||
class ccSGD(val learningRate: Float = 0.01f, val momentum: Float = 0.0f, | ||
val wd: Float = 0.0001f, val rescaleGradient: Float = 1.0f, | ||
val clipGradient: Float = -1f, val lrScheduler: LRScheduler = null | ||
) extends Optimizer { | ||
|
||
if (lrScheduler != null) { | ||
lrScheduler.baseLR = learningRate | ||
} | ||
|
||
private val optCreator = new OptimizerCreatorRef | ||
private val optHandle = new OptimizerHandleRef | ||
|
||
checkCall(_LIB.mxOptimizerFindCreator("ccsgd", optCreator)) | ||
private val paramKeys = Array("momentum", "rescale_grad", "clip_gradient") | ||
private val paramvals = Array(s"$momentum", s"$rescaleGradient", s"$clipGradient") | ||
checkCall(_LIB.mxOptimizerCreateOptimizer( | ||
optCreator.value, paramKeys.length, paramKeys, paramvals, optHandle)) | ||
|
||
/** | ||
* Update the parameters. | ||
* @param index An unique integer key used to index the parameters | ||
* @param weight weight ndarray | ||
* @param grad grad ndarray | ||
* @param state NDArray or other objects returned by initState | ||
* The auxiliary state used in optimization. | ||
*/ | ||
override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = { | ||
val lr = | ||
(if (lrScheduler != null) { | ||
val scheduledLr = lrScheduler(numUpdate) | ||
updateCount(index) | ||
scheduledLr | ||
} else { | ||
this.learningRate | ||
}) * lrScale.getOrElse(index, 1f) | ||
|
||
val wd = getWd(index, this.wd) | ||
checkCall(_LIB.mxOptimizerUpdate(optHandle.value, index, weight.handle, grad.handle, lr, wd)) | ||
} | ||
|
||
// Create additional optimizer state such as momentum. | ||
override def createState(index: Int, weight: NDArray): AnyRef = { | ||
null | ||
} | ||
|
||
// Dispose the state it created | ||
override def disposeState(state: AnyRef): Unit = {} | ||
|
||
/** | ||
* Free the optimizer handle. | ||
* The object shall never be used after it is disposed. | ||
*/ | ||
def dispose(): Unit = { | ||
checkCall(_LIB.mxOptimizerFree(optHandle.value)) | ||
} | ||
} |
Oops, something went wrong.