deeplearning4j · AlexDBlack · Aug 31, 2017 · Aug 28, 2017 · Aug 28, 2017 · Aug 28, 2017
diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
@@ -299,6 +299,11 @@ public void setParam(String key, INDArray val) {
     @Override
     public void clear() {}
 
+    @Override
+    public void applyConstraints(int iteration, int epoch) {
+        //No op
+    }
+
     /* compute the gradient given the current solution, the probabilities and the constant */
     protected Pair<Double, INDArray> gradient(INDArray p) {
         throw new UnsupportedOperationException();

diff --git a/...learning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/...learning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
@@ -0,0 +1,102 @@
+package org.deeplearning4j.nn.conf.constraints;
+
+import org.deeplearning4j.nn.api.layers.LayerConstraint;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.constraint.MaxNormConstraint;
+import org.deeplearning4j.nn.conf.constraint.MinMaxNormConstraint;
+import org.deeplearning4j.nn.conf.constraint.NonNegativeConstraint;
+import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint;
+import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.util.ModelSerializer;
+import org.junit.Test;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.lossfunctions.LossFunctions;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestConstraints {
+
+    @Test
+    public void testConstraints() throws Exception {
+
+
+        LayerConstraint[] constraints = new LayerConstraint[]{
+                new MaxNormConstraint(0.5, 1),
+                new MinMaxNormConstraint(0.3, 0.4, 1),
+                new NonNegativeConstraint(),
+                new UnitNormConstraint(1)
+        };
+
+        for(LayerConstraint lc : constraints){
+
+            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                    .constraints(lc)
+                    .learningRate(0.0)
+                    .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0,5))
+                    .list()
+                    .layer(new DenseLayer.Builder().nIn(12).nOut(10).build())
+                    .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build())
+                    .build();
+
+            MultiLayerNetwork net = new MultiLayerNetwork(conf);
+            net.init();
+
+            List<LayerConstraint> exp = Collections.singletonList(lc.clone());
+            assertEquals(exp, net.getLayer(0).conf().getLayer().getConstraints());
+            assertEquals(exp, net.getLayer(1).conf().getLayer().getConstraints());
+
+            INDArray input = Nd4j.rand(3, 12);
+            INDArray labels = Nd4j.rand(3, 8);
+
+            net.fit(input, labels);
+
+            INDArray w0 = net.getParam("0_W");
+            INDArray b0 = net.getParam("0_b");
+            INDArray w1 = net.getParam("1_W");
+            INDArray b1 = net.getParam("1_b");
+
+            if(lc instanceof MaxNormConstraint){
+                assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.5 );
+                assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.5 );
+            } else if(lc instanceof MinMaxNormConstraint){
+                assertTrue(w0.norm2(1).minNumber().doubleValue() >= 0.3 );
+                assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.4 );
+                assertTrue(w1.norm2(1).minNumber().doubleValue() >= 0.3 );
+                assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.4 );
+            } else if(lc instanceof NonNegativeConstraint ){
+                assertTrue(w0.minNumber().doubleValue() >= 0.0 );
+            } else if(lc instanceof UnitNormConstraint ){
+                assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
+                assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
+                assertEquals(w1.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
+                assertEquals(w1.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
+            }
+
+
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            ModelSerializer.writeModel(net, baos, true);
+            byte[] bytes = baos.toByteArray();
+
+            ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+            MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+
+            assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
+            assertEquals(net.params(), restored.params());
+        }
+
+    }
+
+}
diff --git a/...earning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/...earning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
@@ -1079,6 +1079,42 @@ public void testVertexAsOutput(){
         assertNotNull(out[0]);
 
         assertArrayEquals(new int[]{minibatch, 1, 36, 48}, out[0].shape());
+    }
+
+    @Test
+    public void testEpochCounter() throws Exception {
+
+        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
+                .graphBuilder()
+                .addInputs("in")
+                .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in")
+                .setOutputs("out")
+                .build();
+
+        ComputationGraph net = new ComputationGraph(conf);
+        net.init();
+
+        assertEquals(0, net.getConfiguration().getEpochCount());
+
+
+        DataSetIterator iter = new IrisDataSetIterator(150, 150);
+
+        for( int i=0; i<4; i++ ){
+            assertEquals(i, net.getConfiguration().getEpochCount());
+            net.fit(iter);
+            assertEquals(i+1, net.getConfiguration().getEpochCount());
+        }
+
+        assertEquals(4, net.getConfiguration().getEpochCount());
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+        ModelSerializer.writeModel(net, baos, true);
+        byte[] bytes = baos.toByteArray();
+
+        ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
 
+        ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true);
+        assertEquals(4, restored.getConfiguration().getEpochCount());
     }
 }
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java
@@ -64,6 +64,7 @@
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.ObjectOutputStream;
 import java.util.*;
 
 import static org.junit.Assert.*;
@@ -1151,4 +1152,40 @@ public void testCompareLayerMethods(){
 
         assertEquals(conf1, conf2);
     }
+
+
+    @Test
+    public void testEpochCounter() throws Exception {
+
+        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                .list()
+                .layer(new OutputLayer.Builder().nIn(4).nOut(3).build())
+                .build();
+
+        MultiLayerNetwork net = new MultiLayerNetwork(conf);
+        net.init();
+
+        assertEquals(0, net.getLayerWiseConfigurations().getEpochCount());
+
+
+        DataSetIterator iter = new IrisDataSetIterator(150, 150);
+
+        for( int i=0; i<4; i++ ){
+            assertEquals(i, net.getLayerWiseConfigurations().getEpochCount());
+            net.fit(iter);
+            assertEquals(i+1, net.getLayerWiseConfigurations().getEpochCount());
+        }
+
+        assertEquals(4, net.getLayerWiseConfigurations().getEpochCount());
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+        ModelSerializer.writeModel(net, baos, true);
+        byte[] bytes = baos.toByteArray();
+
+        ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+
+        MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+        assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount());
+    }
 }
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java
@@ -1088,5 +1088,10 @@ public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskSt
         public INDArray getGradientsViewArray() {
             return gradientView;
         }
+
+        @Override
+        public void applyConstraints(int iteration, int epoch) {
+
+        }
     }
 }
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java
@@ -264,4 +264,10 @@ public interface Model {
      * Clear input
      */
     void clear();
+
+
+    /**
+     * Apply any constraints to the model
+     */
+    void applyConstraints(int iteration, int epoch);
 }
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java
@@ -34,6 +34,10 @@ public interface ParamInitializer {
 
     int numParams(org.deeplearning4j.nn.conf.layers.Layer layer);
 
+    boolean isWeightParam(String key);
+
+    boolean isBiasParam(String key);
+
     /**
      * Initialize the parameters
      *

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java
@@ -0,0 +1,15 @@
+package org.deeplearning4j.nn.api.layers;
+
+import org.deeplearning4j.nn.api.Layer;
+import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
+
+import java.io.Serializable;
+
+@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
+public interface LayerConstraint extends Cloneable, Serializable {
+
+    void applyConstraint(Layer layer, int iteration, int epoch);
+
+    LayerConstraint clone();
+
+}
diff --git a/...learning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/...learning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java
@@ -97,6 +97,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable {
     // for Spark and model serialization
     protected int iterationCount = 0;
 
+    //Counter for the number of epochs completed so far. Used for per-epoch schedules
+    protected int epochCount = 0;
+
 
     /**
      * @return JSON representation of configuration

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java
@@ -79,6 +79,9 @@ public class MultiLayerConfiguration implements Serializable, Cloneable {
     // for Spark and model serialization
     protected int iterationCount = 0;
 
+    //Counter for the number of epochs completed so far. Used for per-epoch schedules
+    protected int epochCount = 0;
+
     /**
      *
      * @return  JSON representation of NN configuration

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
@@ -25,6 +25,7 @@
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.ClassUtils;
 import org.deeplearning4j.nn.api.OptimizationAlgorithm;
+import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.graph.GraphVertex;
 import org.deeplearning4j.nn.conf.inputs.InputType;
@@ -120,6 +121,9 @@ public class NeuralNetConfiguration implements Serializable, Cloneable {
     // for Spark and model serialization
     protected int iterationCount = 0;
 
+    //Counter for the number of epochs completed so far. Used for per-epoch schedules
+    protected int epochCount = 0;
+
     private static ObjectMapper mapper = initMapper();
     private static final ObjectMapper mapperYaml = initMapperYaml();
     private static Set<Class<?>> subtypesClassCache = null;
@@ -631,6 +635,7 @@ public static class Builder implements Cloneable {
         protected double lrPolicySteps = Double.NaN;
         protected double lrPolicyPower = Double.NaN;
         protected boolean pretrain = false;
+        protected List<LayerConstraint> constraints = null;
 
         protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.NONE;
         protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.SEPARATE;
@@ -1222,11 +1227,39 @@ public Builder lrPolicyPower(double lrPolicyPower) {
             return this;
         }
 
+        /**
+         * Sets the convolution mode for convolutional layers, which impacts padding and output sizes.
+         * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
+         * @param convolutionMode Convolution mode to use
+         */
         public Builder convolutionMode(ConvolutionMode convolutionMode) {
             this.convolutionMode = convolutionMode;
             return this;
         }
 
+        /**
+         * Set constraints to be applied to all layers. Default: no constraints.<br>
+         * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization,
+         * etc). These constraints are applied at each iteration, after the parameters have been updated.
+         *
+         * @param constraints Constraints to apply to all layers
+         */
+        public Builder constraints(LayerConstraint... constraints){
+            return constraints(Arrays.asList(constraints));
+        }
+
+        /**
+         * Set constraints to be applied to all layers. Default: no constraints.<br>
+         * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization,
+         * etc). These constraints are applied at each iteration, after the parameters have been updated.
+         *
+         * @param constraints Constraints to apply to all layers
+         */
+        public Builder constraints(List<LayerConstraint> constraints){
+            this.constraints = constraints;
+            return this;
+        }
+
         private void learningRateValidation(String layerName) {
             if (learningRatePolicy != LearningRatePolicy.None && Double.isNaN(lrPolicyDecayRate)) {
                 //LR policy, if used, should have a decay rate. 2 exceptions: Map for schedule, and Poly + power param
@@ -1335,7 +1368,7 @@ private void configureLayer(Layer layer) {
                 }
             }
             LayerValidation.generalValidation(layerName, layer, useDropConnect, dropOut, l2, l2Bias,
-                            l1, l1Bias, dist);
+                            l1, l1Bias, dist, constraints);
         }
 
         private void copyConfigToLayer(String layerName, Layer layer) {