deeplearning4j · eraly · May 1, 2018 · May 1, 2018
diff --git a/android-DL4JIrisClassifierDemo.md b/android-DL4JIrisClassifierDemo.md
@@ -189,7 +189,7 @@ The next step is to build the neural network using *nccBuilder*. The parameters
         NeuralNetConfiguration.Builder nncBuilder = new NeuralNetConfiguration.Builder();
         long seed = 6;
         nncBuilder.seed(seed);
-        nncBuilder.learningRate(0.1);
+        nncBuilder.updater(new Sgd(0.1))
         nncBuilder.activation(Activation.TANH);
         nncBuilder.weightInit(WeightInit.XAVIER);
         nncBuilder.regularization(true).l2(1e-4);

diff --git a/building-neural-net-with-dl4j.md b/building-neural-net-with-dl4j.md
@@ -30,7 +30,7 @@ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
 	.seed(seed)
 	 .activation("tanh")
 	 .weightInit(WeightInit.XAVIER)
-	 .learningRate(0.1)
+	 .updater(new Sgd(0.1))
 	 .regularization(true).l2(1e-4)
 	 .list()
 	 .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3)
@@ -61,8 +61,7 @@ Here is a configuration of a recurrent neural network taken, from our examples.
 ```
 ComputationGraphConfiguration configuration = new NeuralNetConfiguration.Builder()
    	.weightInit(WeightInit.XAVIER)
-	.learningRate(0.5)
-	.updater(Updater.RMSPROP)
+	.updater(new RmsProp(0.5))
 	.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
 	.seed(seed)
 	.graphBuilder()

diff --git a/compgraph.md b/compgraph.md
@@ -96,7 +96,7 @@ For the sake of this example, lets assume our input data is of size 5. Our confi
 
 ```java
 ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
-        .learningRate(0.01)
+		.updater(new Sgd(0.01))
         .graphBuilder()
         .addInputs("input") //can use any label for this
         .addLayer("L1", new GravesLSTM.Builder().nIn(5).nOut(5).build(), "input")
@@ -123,7 +123,7 @@ To build the above network, we use the following configuration:
 
 ```java
 ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
-        .learningRate(0.01)
+		.updater(new Sgd(0.01))
         .graphBuilder()
         .addInputs("input1", "input2")
         .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1")
@@ -145,7 +145,7 @@ In this case, the network configuration is:
 
 ```java
 ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
-        .learningRate(0.01)
+		.updater(new Sgd(0.01))
         .graphBuilder()
         .addInputs("input")
         .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input")

diff --git a/core-concepts.md b/core-concepts.md
@@ -68,8 +68,7 @@ DL4J gives data scientists and developers tools to build a deep neural networks
 MultiLayerConfiguration conf = 
 	new NeuralNetConfiguration.Builder()
 		.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-		.updater(Updater.NESTEROVS).momentum(0.9)
-		.learningRate(learningRate)
+		.updater(new Nesterovs(learningRate, 0.9))
 		.list(
 			new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes).activation("relu").build(),
 			new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).activation("softmax").nIn(numHiddenNodes).nOut(numOutputs).build()

diff --git a/dbn-iris-tutorial.md b/dbn-iris-tutorial.md
@@ -119,13 +119,13 @@ With DL4J, creating a neural network of any kind involves several steps.
 First, we need to create a configuration object:
 
     NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
-    .hiddenUnit(RBM.HiddenUnit.RECTIFIED).momentum(5e-1f)
+    .hiddenUnit(RBM.HiddenUnit.RECTIFIED)
         .visibleUnit(RBM.VisibleUnit.GAUSSIAN).regularization(true)
         .regularizationCoefficient(2e-4f).dist(Distributions.uniform(gen))
         .activationFunction(Activations.tanh())
         .weightInit(WeightInit.DISTRIBUTION)
     .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).rng(gen)
-        .learningRate(1e-3f).nIn(4).nOut(3).build();
+        .updater(new Nesterovs(1e-3f,0.5)).nIn(4).nOut(3).build();
 
 This has everything that our DBN classifier will need. As you can see, there are a lot of parameters, or ‘knobs’, that you will learn to adjust over time to improve your nets’ performance. These are the pedals, clutch and steering wheel attached to DL4J's deep-learning engine. 
 

diff --git a/deep-learning-and-the-game-of-go.md b/deep-learning-and-the-game-of-go.md
@@ -171,10 +171,9 @@ With data processed and ready to go, you can now turn to building the actual mod
 
         MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                 .seed(randomSeed)
-                .learningRate(.1)
+				.updater(new AdaGrad(0.1))
                 .weightInit(WeightInit.XAVIER)
                 .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-                .updater(Updater.ADAGRAD)
                 .list()
                 .layer(0, new ConvolutionLayer.Builder(3, 3)
                         .nIn(featurePlanes).stride(1, 1).nOut(50).activation(Activation.RELU).build())

diff --git a/deprecated/XXiris-flower-dataset-tutorial.md b/deprecated/XXiris-flower-dataset-tutorial.md
@@ -125,7 +125,7 @@ public class DBNIrisExample {
      log.info("Build model....");		
      MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()		
              .seed(seed) // Locks in weight initialization for tuning		
-             .learningRate(1e-6f) // Optimization step size		
+             .updater(new Sgd(1e-6f)) // Optimization step size		
              .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) // Backprop to calculate gradients		
              .l1(1e-1).regularization(true).l2(2e-4)		
              .useDropConnect(true)		
@@ -216,10 +216,10 @@ A *NeuralNetConfiguration* object is the fundamental object used to construct La
 ^ This line uses a specific, randomly generated weight initialization. If you run an example many times, and generate new, random weights each time, then your net's F1 score may vary a great deal, because different initial weights can lead algorithms to different local minima of the errorscape. Keeping the weights the same allows you see the effect of adjusting other hyperparameters more clearly. `seed` is a variable specified before we congifure the model. 
 
 ``` java
-.learningRate(1e-6f)
+.updater(new Sgd(1e-6f))
 ```
 
-^ This line sets the learning rate, which is the size of the adjustments made to the weights with each iteration. A high learning rate makes a net traverse the errorscape quickly, but makes it prone to overshoot the minima. A low learning rate is more likely to find the minimum, but it will do so very slowly.
+^ This line sets the learning rate and specifies an updater to use. The learning rate is the size of the adjustments made to the weights with each iteration. A high learning rate makes a net traverse the errorscape quickly, but makes it prone to overshoot the minima. A low learning rate is more likely to find the minimum, but it will do so very slowly.
 
 ``` java
 .optimizationAlgo(OptimizationAlgorithm.LBFGS) 
@@ -228,7 +228,7 @@ A *NeuralNetConfiguration* object is the fundamental object used to construct La
 ^ This line specifies your optimization algorithm as Limited-memory BFGS, a backpropagation method that helps calculate gradients. 
 
 ``` java
-.l2(2e-4).regularization(true).momentum(0.9).constrainGradientToUnitNorm(true)
+.l2(2e-4).regularization(true).updater(new Nesterovs(0.001,0.9)).constrainGradientToUnitNorm(true)
 ```
 
 ^ This line sets several parameters: 

diff --git a/doc_templates/template_doc_model.md b/doc_templates/template_doc_model.md
@@ -39,12 +39,9 @@ LearningRatePolicy provides decay alternatives during training. [[Explain what d
 double lr = 1e-2;
 double decayRate = 2;
 NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
-.learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Exponential)
-.lrPolicyDecayRate(decayRate)
-.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
-.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
-.build();
+    .updater(new Sgd(new ExponentialSchedule(ScheduleType.ITERATION, lr, decayRate)))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
 <b>Inverse</b>
@@ -54,35 +51,30 @@ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
 double lr = 1e-2;
 double decayRate = 2;
 double power = 3;
-NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Inverse)
-.lrPolicyDecayRate(decayRate).lrPolicyPower(power)
-.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
-.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
-.build();
+NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+    .updater(new Sgd(new InverseSchedule(ScheduleType.ITERATION,lr,decayRate,power)))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
 <b>Poly</b>
 
 [[Brief description]]
 ```
-NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyPower(power)
-.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)    
-.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())                                      
-.build();
+NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+    .updater(new Sgd(new PolySchedule(ScheduleType.ITERATION,lr,power,maxIter))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
 <b>Sigmoid</b>
 
 [[Brief description]]
 ```
-NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Sigmoid)
-.lrPolicyDecayRate(decayRate).lrPolicySteps(steps)
-.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
-.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())                                
-.build();
+NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+    .updater(new Sgd(new SigmoidSchedule(ScheduleType.ITERATION,lr,decayRate,steps)))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
 <b>Step</b>
@@ -92,33 +84,22 @@ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(
 double lr = 1e-2;
 double decayRate = 2;
 double steps = 3;
-NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate)
-.lrPolicySteps(steps)
-.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
-.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
-.build();
+NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+    .updater(new Sgd(new StepSchedule(ScheduleType.ITERATION,lr,decayRate,steps)))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
 <b>Schedule</b>
 
 Allows you to specify a schedule [[explain]].
 ```
-MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-.learningRateDecayPolicy(LearningRatePolicy.Schedule)                            
-.learningRateSchedule(learningRateSchedule)
-```
-
-<b>Score</b>
-
-[[Brief description]]
-```
-MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-.learningRate(lr)
-.learningRateDecayPolicy(LearningRatePolicy.Score).lrPolicyDecayRate(lrScoreDecay).list()
+NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+    .updater(new Sgd(new MapSchedule(ScheduleType.ITERATION,learningRateMapSchedule))
+    .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
+    .build();
 ```
 
-
 ## <a name="setup">How It Works</a>
 
    <b><a href="#prereqs">0. Prerequisites</a></b>

diff --git a/jcg.md b/jcg.md
@@ -75,8 +75,7 @@ Now that the data is ready, we can set up the configuration of the neural networ
 MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
     .seed(seed)
     .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-    .learningRate(learningRate)
-    .updater(Updater.NESTEROVS)
+    .updater(new Nesterovs(learningRate))
     .list()
     .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
         .weightInit(WeightInit.XAVIER)

diff --git a/mnist-for-beginners.md b/mnist-for-beginners.md
@@ -68,19 +68,16 @@ layout: default
     MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
             .seed(rngSeed)
             .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-            .learningRate(0.006)
-            .updater(Updater.NESTEROVS).momentum(0.9)
+            .updater(new Nesterovs(0.006,0.9))
             .regularization(true).l2(1e-4)
             .list()
   </code></pre>
   <h5>.seed(rngSeed)</h5>
   <p>This parameter uses a specific, randomly generated weight initialization. If you run an example many times, and generate new, random weights each time you begin, then your net’s results -- accuracy and F1 score -- may vary a great deal, because different initial weights can lead algorithms to different local minima in the errorscape. Keeping the same random weights allows you isolate the effect of adjusting other hyperparameters more clearly, while other conditions remain equal.</p>
   <h5>.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)</h5>
   <p>Stochastic gradient descent (SGD) is a common method to optimize the cost function. To learn more about SGD and other optimization algorithms that help minimize error, we recommend <a href="https://www.coursera.org/learn/machine-learning" target="_blank">Andrew Ng’s Machine Learning course</a> and the SGD definition in our <a href="http://deeplearning4j.org/glossary#stochasticgradientdescent" target="_blank">glossary</a>.</p>
-  <h5>.learningRate(0.006)</h5>
-  <p>This line sets the learning rate, which is the size of the adjustments made to the weights with each iteration, the step size. A high learning rate makes a net traverse the errorscape quickly, but also makes it prone to overshoot the point of minimum error. A low learning rate is more likely to find the minimum, but it will do so very slowly, because it is taking small steps in adjusting the weights.</p>
-  <h5>.updater(Updater.NESTEROVS).momentum(0.9)</h5>
-  <p>Momentum is an additional factor in determining how fast an optimization algorithm converges on the optimum point. Momentum affects the direction that weights are adjusted in, so in the code we consider it a weight <code>updater</code>.</p>
+  <h5>.updater(new Nesterovs(0.006,0.9))</h5>
+  <p>This line sets the updater as nesterovs and the learning rate and the momentun. The learning rate, which is the size of the adjustments made to the weights with each iteration, the step size. A high learning rate makes a net traverse the errorscape quickly, but also makes it prone to overshoot the point of minimum error. A low learning rate is more likely to find the minimum, but it will do so very slowly, because it is taking small steps in adjusting the weights. Momentum is an additional factor in determining how fast an optimization algorithm converges on the optimum point. Momentum affects the direction that weights are adjusted in, so in the code we consider it a weight <code>updater</code>.</p>
   <h5>.regularization(true).l2(1e-4)</h5>
   <p>Regularization is a technique to prevent what’s called <b>overfitting</b>. Overfitting is when the model fits the training data really well, but performs poorly in real life as soon as it's exposed to data it hasn’t seen before.</p>
   <p>We use L2 regularization, which prevents individual weights from having too much influence on the overall results.</p>

diff --git a/multilayerperceptron.md b/multilayerperceptron.md
@@ -64,8 +64,7 @@ Eclipse Deeplearning4j includes [several examples of multilayer perceptrons](htt
         MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                 .seed(seed)
                 .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-                .learningRate(learningRate)
-                .updater(Updater.NESTEROVS)     //To configure: .updater(new Nesterovs(0.9))
+                .updater(new Nesterovs(learningRate,0.9))
                 .list()
                 .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                         .weightInit(WeightInit.XAVIER)

diff --git a/programmingguide/03_code_structure.md b/programmingguide/03_code_structure.md
@@ -153,7 +153,7 @@ The `MultiLayerConfiguration` can be used to define the parameter and structure
     MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
         .activation("relu")
         .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-        .learningRate(0.05)
+        .updater(new Sgd(0.05))
         // ... other hyperparameters
         .list()
         .backprop(true)
@@ -189,11 +189,11 @@ To create a `ComputationGraphConfiguration` use the following line:
 
         ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
 
-Note that the code is slightly different for configuring a `ComputationGraph` rather than a `MultiLayerNetwork`. However, similar to before, `ComputationGraphConfiguration` can be used the define neural network parameters such as the learning rate, optimization algorithm, and network structure. For example, the following code chunk configures a `ComputationGraph` using stochastic gradient descent and a learning rate of 0.1. It also defines the network structure to include one hidden layer with 500 nodes and an output layer which outputs a probability. 
+Note that the code is slightly different for configuring a `ComputationGraph` rather than a `MultiLayerNetwork`. However, similar to before, `ComputationGraphConfiguration` can be used the define neural network parameters such as the learning rate, optimization algorithm, and network structure. For example, the following code chunk configures a `ComputationGraph` using stochastic gradient descent and a learning rate of 0.1 set via the updater which is chosen as SGD. It also defines the network structure to include one hidden layer with 500 nodes and an output layer which outputs a probability. 
 
     ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
 	   .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-	   .learningRate(0.1)
+	   .updater(new Sgd(0.1))
 	   // other parameters
 	   .graphBuilder()
 	   .setInputTypes(InputType.feedForward(100))

diff --git a/programmingguide/04_convnet.md b/programmingguide/04_convnet.md
@@ -109,7 +109,7 @@ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
     .seed(seed)
     .regularization(false).l2(0.005) 
     .activation(Activation.RELU)
-    .learningRate(0.0001)
+    .updater(new Sgd(0.0001))
     .weightInit(WeightInit.XAVIER)
     .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
     .updater(new Nesterovs(0.9))

diff --git a/programmingguide/05_lstm.md b/programmingguide/05_lstm.md
@@ -64,7 +64,7 @@ public static final int NB_INPUTS = 86;
 
 ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
     .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-    .learningRate(LEARNING_RATE)
+    .updater(new Sgd(LEARNING_RATE))
     .graphBuilder()
     .addInputs("trainFeatures")
     .setOutputs("predictMortality")

diff --git a/programmingguide/06_feedforwardnet.md b/programmingguide/06_feedforwardnet.md
@@ -51,8 +51,7 @@ Now that the data is ready, we can set up the configuration of the neural networ
 MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
     .seed(seed)
     .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-    .learningRate(learningRate)
-    .updater(Updater.NESTEROVS)
+    .updater(new Nesterovs(learningRate))
     .list()
     .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
         .weightInit(WeightInit.XAVIER)