From 4aaef0cb83d0da8270b049c2adf83bb347e11255 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 28 Aug 2017 20:01:03 +1000 Subject: [PATCH 1/9] Add epoch counter functionality + tests --- .../nn/graph/TestComputationGraphNetwork.java | 36 ++++++++++++++++++ .../nn/multilayer/MultiLayerTest.java | 37 +++++++++++++++++++ .../conf/ComputationGraphConfiguration.java | 3 ++ .../nn/conf/MultiLayerConfiguration.java | 3 ++ .../nn/graph/ComputationGraph.java | 16 ++++++++ .../nn/multilayer/MultiLayerNetwork.java | 16 ++++++++ 6 files changed, 111 insertions(+) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 459d2b12d886..e3cf42b3e5d4 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -1079,6 +1079,42 @@ public void testVertexAsOutput(){ assertNotNull(out[0]); assertArrayEquals(new int[]{minibatch, 1, 36, 48}, out[0].shape()); + } + + @Test + public void testEpochCounter() throws Exception { + + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + .graphBuilder() + .addInputs("in") + .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") + .setOutputs("out") + .build(); + + ComputationGraph net = new ComputationGraph(conf); + net.init(); + + assertEquals(0, net.getConfiguration().getEpochCount()); + + + DataSetIterator iter = new IrisDataSetIterator(150, 150); + + for( int i=0; i<4; i++ ){ + assertEquals(i, net.getConfiguration().getEpochCount()); + net.fit(iter); + assertEquals(i+1, net.getConfiguration().getEpochCount()); + } + + assertEquals(4, net.getConfiguration().getEpochCount()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); + assertEquals(4, restored.getConfiguration().getEpochCount()); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index 5b993bf9a4bd..dbc719c7dcbe 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -64,6 +64,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.ObjectOutputStream; import java.util.*; import static org.junit.Assert.*; @@ -1151,4 +1152,40 @@ public void testCompareLayerMethods(){ assertEquals(conf1, conf2); } + + + @Test + public void testEpochCounter() throws Exception { + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertEquals(0, net.getLayerWiseConfigurations().getEpochCount()); + + + DataSetIterator iter = new IrisDataSetIterator(150, 150); + + for( int i=0; i<4; i++ ){ + assertEquals(i, net.getLayerWiseConfigurations().getEpochCount()); + net.fit(iter); + assertEquals(i+1, net.getLayerWiseConfigurations().getEpochCount()); + } + + assertEquals(4, net.getLayerWiseConfigurations().getEpochCount()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + + MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount()); + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index 3ac84b11e239..4864e8076917 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -97,6 +97,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { // for Spark and model serialization protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + protected int epochCount = 0; + /** * @return JSON representation of configuration diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java index 9fa91e5e5071..fd055860e552 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java @@ -79,6 +79,9 @@ public class MultiLayerConfiguration implements Serializable, Cloneable { // for Spark and model serialization protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + protected int epochCount = 0; + /** * * @return JSON representation of NN configuration diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 95aaaec0998d..aaadf3d3a5b2 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -962,6 +962,7 @@ public void fit(DataSetIterator iterator) { if (destructable) ((AsyncDataSetIterator) dataSetIterator).shutdown(); + incrementEpochCount(); } /** @@ -1060,6 +1061,7 @@ public void fit(MultiDataSetIterator multi) { if (destructable) ((AsyncMultiDataSetIterator) multiDataSetIterator).shutdown(); + incrementEpochCount(); } protected void migrate(MultiDataSet ds) { @@ -3182,6 +3184,20 @@ protected void clearLayersStates() { } } + /** + * Increment the epoch count (in the underlying {@link MultiLayerConfiguration} by 1). + * Note that this is done automatically when using iterator-based fitting methods, such as + * {@link #fit(DataSetIterator)} or {@link #fit(MultiDataSet)}. However, when using non-iterator fit methods + * (DataSet, MultiDataSet, INDArrays etc), the network has no way to know when one epoch ends and another starts. + * In such situations, this method can be used to increment the epoch counter.
+ * Note that the epoch counter is used for situations such as some learning rate schedules, and the like. + * + * The current epoch count can be obtained using {@code ComputationGraph.getConfiguration().getEpochCount()} + */ + public void incrementEpochCount(){ + configuration.setEpochCount(configuration.getEpochCount() + 1); + } + /** * Indicates whether some other object is "equal to" this one. *

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 458f9707fe9b..7d4ac95d1bdd 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -1266,6 +1266,8 @@ public void fit(DataSetIterator iterator) { if (destructable) ((AsyncDataSetIterator) iter).shutdown(); + + incrementEpochCount(); } /** Calculate and set gradients for MultiLayerNetwork, based on OutputLayer and labels*/ @@ -3056,6 +3058,20 @@ protected void clearLayersStates() { } } + /** + * Increment the epoch count (in the underlying {@link MultiLayerConfiguration} by 1). + * Note that this is done automatically when using iterator-based fitting methods, such as + * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, INDArray/INDArray etc), + * the network has no way to know when one epoch ends and another starts. In such situations, this method + * can be used to increment the epoch counter.
+ * Note that the epoch counter is used for situations such as some learning rate schedules, and the like. + * + * The current epoch count can be obtained using {@code MultiLayerConfiguration.getLayerwiseConfiguration().getEpochCount()} + */ + public void incrementEpochCount(){ + layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1); + } + /** * Indicates whether some other object is "equal to" this one. *

From 8c43aa5c04409ec8c6c1693b3fc1aa5b74b61248 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 28 Aug 2017 20:15:02 +1000 Subject: [PATCH 2/9] Epoch count functionality + tests for Spark --- .../impl/graph/SparkComputationGraph.java | 4 ++ .../impl/multilayer/SparkDl4jMultiLayer.java | 2 + ...TestSparkMultiLayerParameterAveraging.java | 49 +++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java index 3f92eb5b9758..f4520d62841e 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java @@ -166,6 +166,7 @@ public ComputationGraph fit(JavaRDD rdd) { ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); trainingMaster.executeTraining(this, rdd); + network.incrementEpochCount(); return network; } @@ -207,6 +208,7 @@ public ComputationGraph fit(String path, int minPartitions) { */ public ComputationGraph fitPaths(JavaRDD paths) { trainingMaster.executeTrainingPaths(this, paths); + network.incrementEpochCount(); return network; } @@ -231,6 +233,7 @@ public ComputationGraph fitMultiDataSet(JavaRDD rdd) { ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); trainingMaster.executeTrainingMDS(this, rdd); + network.incrementEpochCount(); return network; } @@ -263,6 +266,7 @@ public ComputationGraph fitMultiDataSet(String path) { */ public ComputationGraph fitPathsMultiDataSet(JavaRDD paths) { trainingMaster.executeTrainingPathsMDS(this, paths); + network.incrementEpochCount(); return network; } diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java index 4ec36ce9328a..36e55bbbe737 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java @@ -216,6 +216,7 @@ public MultiLayerNetwork fit(JavaRDD trainingData) { ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); trainingMaster.executeTraining(this, trainingData); + network.incrementEpochCount(); return network; } @@ -257,6 +258,7 @@ public MultiLayerNetwork fit(String path, int minPartitions) { */ public MultiLayerNetwork fitPaths(JavaRDD paths) { trainingMaster.executeTrainingPaths(this, paths); + network.incrementEpochCount(); return network; } diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index 35a7fa296277..f570618a11e4 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -46,6 +46,7 @@ import org.deeplearning4j.nn.conf.layers.RBM; import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; +import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; @@ -975,4 +976,52 @@ public void testROCMultiClass() { assertEquals(local.getRocCurve(i), sparkROC.getRocCurve(i)); } } + + + @Test + public void testEpochCounter() throws Exception { + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) + .build(); + + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .graphBuilder() + .addInputs("in") + .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") + .setOutputs("out") + .build(); + + DataSetIterator iter = new IrisDataSetIterator(1, 150); + + List l = new ArrayList<>(); + while(iter.hasNext()){ + l.add(iter.next()); + } + + JavaRDD rdd = sc.parallelize(l); + + + int rddDataSetNumExamples = 1; + int averagingFrequency = 3; + ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(rddDataSetNumExamples) + .averagingFrequency(averagingFrequency).batchSizePerWorker(rddDataSetNumExamples) + .saveUpdater(true).workerPrefetchNumBatches(0).build(); + Nd4j.getRandom().setSeed(12345); + + + SparkDl4jMultiLayer sn1 = new SparkDl4jMultiLayer(sc, conf.clone(), tm); + SparkComputationGraph sn2 = new SparkComputationGraph(sc, conf2.clone(), tm); + + + for(int i=0; i<4; i++ ){ + assertEquals(i, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i, sn2.getNetwork().getConfiguration().getEpochCount()); + sn1.fit(rdd); + sn2.fit(rdd); + assertEquals(i+1, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i+1, sn2.getNetwork().getConfiguration().getEpochCount()); + } + } } From 00e1f6b5927b5925d1f4ff5257ad10b3d9062363 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 28 Aug 2017 21:00:58 +1000 Subject: [PATCH 3/9] Constraints API + implementations --- .../nn/api/ParamInitializer.java | 4 ++ .../nn/api/layers/LayerConstraint.java | 11 +++++ .../nn/conf/constraint/BaseConstraint.java | 45 +++++++++++++++++++ .../nn/conf/constraint/MaxNormConstraint.java | 33 ++++++++++++++ .../conf/constraint/MinMaxNormConstraint.java | 27 +++++++++++ .../constraint/NonNegativeConstraint.java | 37 +++++++++++++++ .../conf/constraint/UnitNormConstraint.java | 20 +++++++++ 7 files changed, 177 insertions(+) create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java index 7bb795e4c666..fb94207678cd 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java @@ -34,6 +34,10 @@ public interface ParamInitializer { int numParams(org.deeplearning4j.nn.conf.layers.Layer layer); + boolean isWeightParam(String key); + + boolean isBiasParam(String key); + /** * Initialize the parameters * diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java new file mode 100644 index 000000000000..1b1170bc2f63 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java @@ -0,0 +1,11 @@ +package org.deeplearning4j.nn.api.layers; + +import org.deeplearning4j.nn.api.Layer; +import org.nd4j.shade.jackson.annotation.JsonTypeInfo; + +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +public interface LayerConstraint { + + void applyConstraint(Layer layer, int iteration, int epoch); + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java new file mode 100644 index 000000000000..c65628ec3108 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -0,0 +1,45 @@ +package org.deeplearning4j.nn.conf.constraint; + +import lombok.AllArgsConstructor; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Broadcast; +import org.nd4j.linalg.indexing.BooleanIndexing; +import org.nd4j.linalg.indexing.conditions.Conditions; + +import java.util.Map; + + +@AllArgsConstructor +public abstract class BaseConstraint implements LayerConstraint { + public static final double DEFAULT_EPSILON = 1e-6; + + protected boolean applyToWeights; + protected boolean applyToBiases; + protected double epsilon = 1e-6; + protected int[] dimensions; + + protected BaseConstraint(boolean applyToWeights, boolean applyToBiases, int... dimensions){ + this(applyToWeights, applyToBiases, DEFAULT_EPSILON, dimensions); + } + + + @Override + public void applyConstraint(Layer layer, int iteration, int epoch) { + Map paramTable = layer.paramTable(); + if(paramTable == null || paramTable.isEmpty() ){ + return; + } + + ParamInitializer i = layer.conf().getLayer().initializer(); + for(Map.Entry e : paramTable.entrySet()){ + if(applyToWeights && i.isWeightParam(e.getKey()) || applyToBiases && i.isBiasParam(e.getKey())){ + apply(e.getValue(), i.isBiasParam(e.getKey())); + } + } + } + + public abstract void apply(INDArray param, boolean isBias); +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java new file mode 100644 index 000000000000..f6411af877e9 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -0,0 +1,33 @@ +package org.deeplearning4j.nn.conf.constraint; + +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Broadcast; +import org.nd4j.linalg.indexing.BooleanIndexing; +import org.nd4j.linalg.indexing.conditions.Conditions; + +public class MaxNormConstraint extends BaseConstraint { + + private double maxNorm; + + public MaxNormConstraint(double maxNorm, int... dimensions) { + this(maxNorm, true, false, dimensions); + } + + public MaxNormConstraint(double maxNorm, boolean applyToWeights, boolean applyToBiases, int... dimensions){ + super(applyToWeights, applyToBiases, DEFAULT_EPSILON, dimensions); + this.maxNorm = maxNorm; + } + + + @Override + public void apply(INDArray param, boolean isBias){ + INDArray norm = param.norm2(dimensions); + INDArray clipped = norm.unsafeDuplication(); + BooleanIndexing.replaceWhere(clipped, maxNorm, Conditions.greaterThan(maxNorm)); + norm.addi(epsilon); + + clipped.divi(norm); + + Broadcast.mul(param, clipped, param, dimensions ); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java new file mode 100644 index 000000000000..dd4a578385d4 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -0,0 +1,27 @@ +package org.deeplearning4j.nn.conf.constraint; + +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.BooleanIndexing; +import org.nd4j.linalg.indexing.conditions.Conditions; + +public class MinMaxNormConstraint extends BaseConstraint { + + private double min; + private double max; + + public MinMaxNormConstraint(double min, double max, int... dimensions){ + this(min, max, true, false, dimensions); + } + + public MinMaxNormConstraint(double min, double max, boolean applyToWeights, boolean applyToBiases, int... dimensions){ + super(applyToWeights, applyToBiases, dimensions); + this.min = min; + this.max = max; + } + + @Override + public void apply(INDArray param, boolean isBias) { + BooleanIndexing.replaceWhere(param, min, Conditions.lessThan(min)); + BooleanIndexing.replaceWhere(param, max, Conditions.lessThan(max)); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java new file mode 100644 index 000000000000..bd2dbbbf89a6 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java @@ -0,0 +1,37 @@ +package org.deeplearning4j.nn.conf.constraint; + +import lombok.AllArgsConstructor; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.BooleanIndexing; +import org.nd4j.linalg.indexing.conditions.Conditions; + +import java.util.Map; + +@AllArgsConstructor +public class NonNegativeConstraint implements LayerConstraint { + + protected boolean applyToWeights; + protected boolean applyToBiases; + + public NonNegativeConstraint(){ + this(true, false); + } + + @Override + public void applyConstraint(Layer layer, int iteration, int epoch) { + Map paramTable = layer.paramTable(); + if(paramTable == null || paramTable.isEmpty() ){ + return; + } + + ParamInitializer i = layer.conf().getLayer().initializer(); + for(Map.Entry e : paramTable.entrySet()){ + if(applyToWeights && i.isWeightParam(e.getKey()) || applyToBiases && i.isBiasParam(e.getKey())){ + BooleanIndexing.replaceWhere(e.getValue(), 0.0, Conditions.lessThan(0.0)); + } + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java new file mode 100644 index 000000000000..9d0d2a3bf978 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -0,0 +1,20 @@ +package org.deeplearning4j.nn.conf.constraint; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Broadcast; + + +public class UnitNormConstraint extends BaseConstraint{ + + public UnitNormConstraint(int... dimensions){ + super(true, false, dimensions); + } + + @Override + public void apply(INDArray param, boolean isBias) { + INDArray norm2 = param.norm2(dimensions); + Broadcast.div(param, norm2, param, dimensions ); + } +} From f8c4040fff4a66e2ea1216958491a712c4d22616 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 30 Aug 2017 18:42:30 +1000 Subject: [PATCH 4/9] Constraints: configuration, plug into optimizers --- .../java/org/deeplearning4j/nn/api/Model.java | 6 ++++ .../nn/conf/NeuralNetConfiguration.java | 33 +++++++++++++++++++ .../deeplearning4j/nn/conf/layers/Layer.java | 29 ++++++++++++++++ .../nn/graph/ComputationGraph.java | 7 ++++ .../nn/layers/AbstractLayer.java | 11 +++++++ .../nn/multilayer/MultiLayerNetwork.java | 10 ++++++ .../optimize/solvers/BaseOptimizer.java | 17 ++++++++++ .../solvers/StochasticGradientDescent.java | 1 + 8 files changed, 114 insertions(+) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java index dd21cfba9e95..954bf2a72d29 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java @@ -264,4 +264,10 @@ public interface Model { * Clear input */ void clear(); + + + /** + * Apply any constraints to the model + */ + void applyConstraints(int iteration, int epoch); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index 432170bc04b4..fbd4b8c367d8 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -25,6 +25,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ClassUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -120,6 +121,9 @@ public class NeuralNetConfiguration implements Serializable, Cloneable { // for Spark and model serialization protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + protected int epochCount = 0; + private static ObjectMapper mapper = initMapper(); private static final ObjectMapper mapperYaml = initMapperYaml(); private static Set> subtypesClassCache = null; @@ -631,6 +635,7 @@ public static class Builder implements Cloneable { protected double lrPolicySteps = Double.NaN; protected double lrPolicyPower = Double.NaN; protected boolean pretrain = false; + protected List constraints = null; protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.NONE; protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.SEPARATE; @@ -1222,11 +1227,39 @@ public Builder lrPolicyPower(double lrPolicyPower) { return this; } + /** + * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. + * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE + * @param convolutionMode Convolution mode to use + */ public Builder convolutionMode(ConvolutionMode convolutionMode) { this.convolutionMode = convolutionMode; return this; } + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated. + * + * @param constraints Constraints to apply to all layers + */ + public Builder constraints(LayerConstraint... constraints){ + return constraints(Arrays.asList(constraints)); + } + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated. + * + * @param constraints Constraints to apply to all layers + */ + public Builder constraints(List constraints){ + this.constraints = constraints; + return this; + } + private void learningRateValidation(String layerName) { if (learningRatePolicy != LearningRatePolicy.None && Double.isNaN(lrPolicyDecayRate)) { //LR policy, if used, should have a decay rate. 2 exceptions: Map for schedule, and Poly + power param diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java index 9cf3354815bf..d4feb695ab2c 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java @@ -21,6 +21,7 @@ import lombok.Data; import lombok.NoArgsConstructor; import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; @@ -37,7 +38,9 @@ import org.nd4j.shade.jackson.annotation.JsonTypeInfo.Id; import java.io.Serializable; +import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * A neural network layer. @@ -70,11 +73,13 @@ public abstract class Layer implements Serializable, Cloneable { protected String layerName; protected double dropOut; + protected List constraints; public Layer(Builder builder) { this.layerName = builder.layerName; this.dropOut = builder.dropOut; + this.constraints = builder.constraints; } /** @@ -213,6 +218,7 @@ public IUpdater getIUpdaterByParam(String paramName) { public abstract static class Builder> { protected String layerName = null; protected double dropOut = Double.NaN; + protected List constraints = null; /** * Layer name assigns layer string name. @@ -250,6 +256,29 @@ public T dropOut(double inputRetainProbability) { return (T) this; } + /** + * Set constraints to be applied to this layer. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated. + * + * @param constraints Constraints to apply to all layers + */ + public T constraints(LayerConstraint... constraints) { + return constraints(Arrays.asList(constraints)); + } + + /** + * Set constraints to be applied to this layer. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated. + * + * @param constraints Constraints to apply to all layers + */ + public T constraints(List constraints) { + this.constraints = constraints; + return (T) this; + } + public abstract E build(); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index aaadf3d3a5b2..885d2c759b2f 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -2323,6 +2323,13 @@ public void clear() { labelMaskArrays = null; } + @Override + public void applyConstraints(int iteration, int epoch) { + for(Layer l : layers){ + l.applyConstraints(iteration, epoch); + } + } + //------------------------------------------------------------------------------ //RNN-specific functionality diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index c288ba98697d..f4bd6a61bf4d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -20,6 +20,7 @@ import lombok.Data; import lombok.NoArgsConstructor; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -485,4 +486,14 @@ public void accumulateScore(double accum) { throw new UnsupportedOperationException( "Not supported for this layer, or should be overridden for layers requiring it"); } + + + @Override + public void applyConstraints(int iteration, int epoch){ + if(layerConf().getConstraints() != null){ + for(LayerConstraint lc : layerConf().getConstraints()){ + lc.applyConstraint(this, iteration, epoch); + } + } + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 7d4ac95d1bdd..e815574449a9 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -277,6 +277,9 @@ public void pretrainLayer(int layerIdx, DataSetIterator iter) { } } } + + int ec = getLayer(layerIdx).conf().getEpochCount() + 1; + getLayer(layerIdx).conf().setEpochCount(ec); } /** @@ -2273,6 +2276,13 @@ public void clear() { solver = null; } + @Override + public void applyConstraints(int iteration, int epoch) { + for(Layer l : layers){ + l.applyConstraints(iteration, epoch); + } + } + /** * Averages the given logistic regression * from a mini batch in to this one diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index b703e95f0a93..d49269f1809f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -260,6 +260,7 @@ public boolean optimize() { //check for termination conditions based on absolute change in score checkTerminalConditions(pair.getFirst().gradient(), oldScore, score, i); incrementIterationCount(model, 1); + applyConstraints(model); } return true; } @@ -379,4 +380,20 @@ public static void incrementIterationCount(Model model, int incrementBy) { } } + public static int getEpochCount(Model model){ + if (model instanceof MultiLayerNetwork) { + return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getEpochCount(); + } else if (model instanceof ComputationGraph) { + return ((ComputationGraph) model).getConfiguration().getEpochCount(); + } else { + return model.conf().getEpochCount(); + } + } + + public static void applyConstraints(Model model){ + int iter = getIterationCount(model); + int epoch = getEpochCount(model); + model.applyConstraints(iter, epoch); + } + } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java index 11b7d6140687..0a08882cec6d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java @@ -93,6 +93,7 @@ public boolean optimize() { checkTerminalConditions(pair.getFirst().gradient(), oldScore, score, i); BaseOptimizer.incrementIterationCount(model, 1); + applyConstraints(model); } return true; } From 5236fccc831ca8ab626684aac461093f2d77d343 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 30 Aug 2017 20:03:44 +1000 Subject: [PATCH 5/9] Fixes and test --- .../deeplearning4j/plot/BarnesHutTsne.java | 5 ++ .../nn/conf/constraints/TestConstraints.java | 87 +++++++++++++++++++ .../optimize/solver/TestOptimizers.java | 5 ++ .../nn/api/layers/LayerConstraint.java | 6 +- .../nn/conf/NeuralNetConfiguration.java | 2 +- .../nn/conf/constraint/BaseConstraint.java | 6 ++ .../nn/conf/constraint/MaxNormConstraint.java | 13 ++- .../conf/constraint/MinMaxNormConstraint.java | 10 +++ .../constraint/NonNegativeConstraint.java | 8 ++ .../conf/constraint/UnitNormConstraint.java | 17 +++- .../nn/conf/layers/LayerValidation.java | 14 ++- .../deeplearning4j/nn/layers/FrozenLayer.java | 5 ++ .../variational/VariationalAutoencoder.java | 10 +++ .../BatchNormalizationParamInitializer.java | 10 +++ .../params/ConvolutionParamInitializer.java | 10 +++ .../nn/params/DefaultParamInitializer.java | 10 +++ .../nn/params/EmptyParamInitializer.java | 10 +++ .../params/FrozenLayerParamInitializer.java | 10 +++ ...avesBidirectionalLSTMParamInitializer.java | 11 +++ .../nn/params/GravesLSTMParamInitializer.java | 10 +++ .../nn/params/LSTMParamInitializer.java | 10 +++ .../FineTuneConfiguration.java | 5 +- 22 files changed, 264 insertions(+), 10 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java index 8445e49580d9..ab65e9b05aae 100644 --- a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java +++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java @@ -299,6 +299,11 @@ public void setParam(String key, INDArray val) { @Override public void clear() {} + @Override + public void applyConstraints(int iteration, int epoch) { + //No op + } + /* compute the gradient given the current solution, the probabilities and the constant */ protected Pair gradient(INDArray p) { throw new UnsupportedOperationException(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java new file mode 100644 index 000000000000..f4b25fc450e8 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -0,0 +1,87 @@ +package org.deeplearning4j.nn.conf.constraints; + +import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.constraint.MaxNormConstraint; +import org.deeplearning4j.nn.conf.constraint.MinMaxNormConstraint; +import org.deeplearning4j.nn.conf.constraint.NonNegativeConstraint; +import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInit; +import org.junit.Test; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestConstraints { + + @Test + public void testConstraints(){ + + + LayerConstraint[] constraints = new LayerConstraint[]{ + new MaxNormConstraint(0.5, 1), + new MinMaxNormConstraint(0.3, 0.4, 1), + new NonNegativeConstraint(), + new UnitNormConstraint(1) + }; + + for(LayerConstraint lc : constraints){ + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .constraints(lc) + .learningRate(0.0) + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0,5)) + .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10).build()) + .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + List exp = Collections.singletonList(lc.clone()); + assertEquals(exp, net.getLayer(0).conf().getLayer().getConstraints()); + assertEquals(exp, net.getLayer(1).conf().getLayer().getConstraints()); + + INDArray input = Nd4j.rand(3, 12); + INDArray labels = Nd4j.rand(3, 8); + + net.fit(input, labels); + + INDArray w0 = net.getParam("0_W"); + INDArray b0 = net.getParam("0_b"); + INDArray w1 = net.getParam("1_W"); + INDArray b1 = net.getParam("1_b"); + + System.out.println(w0.norm2(1)); + + if(lc instanceof MaxNormConstraint){ + assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.5 ); + assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.5 ); + } else if(lc instanceof MinMaxNormConstraint){ + assertTrue(w0.minNumber().doubleValue() >= 0.3 ); + assertTrue(w1.maxNumber().doubleValue() <= 0.4 ); + } else if(lc instanceof NonNegativeConstraint ){ + assertTrue(w0.minNumber().doubleValue() >= 0.0 ); + } else if(lc instanceof UnitNormConstraint ){ + assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 ); + assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 ); + assertEquals(w1.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 ); + assertEquals(w1.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 ); + } + } + + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 0ecdae8343c8..6e297143eef1 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -1088,5 +1088,10 @@ public Pair feedForwardMaskArray(INDArray maskArray, MaskSt public INDArray getGradientsViewArray() { return gradientView; } + + @Override + public void applyConstraints(int iteration, int epoch) { + + } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java index 1b1170bc2f63..c1decac44c14 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java @@ -3,9 +3,13 @@ import org.deeplearning4j.nn.api.Layer; import org.nd4j.shade.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; + @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") -public interface LayerConstraint { +public interface LayerConstraint extends Cloneable, Serializable { void applyConstraint(Layer layer, int iteration, int epoch); + LayerConstraint clone(); + } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index fbd4b8c367d8..0e31d156a594 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -1368,7 +1368,7 @@ private void configureLayer(Layer layer) { } } LayerValidation.generalValidation(layerName, layer, useDropConnect, dropOut, l2, l2Bias, - l1, l1Bias, dist); + l1, l1Bias, dist, constraints); } private void copyConfigToLayer(String layerName, Layer layer) { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index c65628ec3108..f9c3304606be 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -1,6 +1,8 @@ package org.deeplearning4j.nn.conf.constraint; import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -13,6 +15,8 @@ @AllArgsConstructor +@EqualsAndHashCode +@Data public abstract class BaseConstraint implements LayerConstraint { public static final double DEFAULT_EPSILON = 1e-6; @@ -42,4 +46,6 @@ public void applyConstraint(Layer layer, int iteration, int epoch) { } public abstract void apply(INDArray param, boolean isBias); + + public abstract BaseConstraint clone(); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index f6411af877e9..257b3fce3c0a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -1,10 +1,15 @@ package org.deeplearning4j.nn.conf.constraint; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; +@Data +@EqualsAndHashCode(callSuper = true) public class MaxNormConstraint extends BaseConstraint { private double maxNorm; @@ -25,9 +30,15 @@ public void apply(INDArray param, boolean isBias){ INDArray clipped = norm.unsafeDuplication(); BooleanIndexing.replaceWhere(clipped, maxNorm, Conditions.greaterThan(maxNorm)); norm.addi(epsilon); - clipped.divi(norm); + //Determine broadcast dimensions: + Broadcast.mul(param, clipped, param, dimensions ); } + + @Override + public MaxNormConstraint clone() { + return new MaxNormConstraint(maxNorm, applyToWeights, applyToBiases, dimensions); + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index dd4a578385d4..f9147add3fe9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -1,9 +1,14 @@ package org.deeplearning4j.nn.conf.constraint; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; +@Data +@EqualsAndHashCode(callSuper = true) public class MinMaxNormConstraint extends BaseConstraint { private double min; @@ -24,4 +29,9 @@ public void apply(INDArray param, boolean isBias) { BooleanIndexing.replaceWhere(param, min, Conditions.lessThan(min)); BooleanIndexing.replaceWhere(param, max, Conditions.lessThan(max)); } + + @Override + public MinMaxNormConstraint clone() { + return new MinMaxNormConstraint(min, max, applyToWeights, applyToBiases, dimensions); + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java index bd2dbbbf89a6..4f928d7d5aee 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java @@ -1,6 +1,8 @@ package org.deeplearning4j.nn.conf.constraint; import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -11,6 +13,7 @@ import java.util.Map; @AllArgsConstructor +@Data public class NonNegativeConstraint implements LayerConstraint { protected boolean applyToWeights; @@ -34,4 +37,9 @@ public void applyConstraint(Layer layer, int iteration, int epoch) { } } } + + @Override + public LayerConstraint clone() { + return new NonNegativeConstraint(applyToWeights, applyToBiases); + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 9d0d2a3bf978..6fcfbcbe3530 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -1,15 +1,21 @@ package org.deeplearning4j.nn.conf.constraint; -import org.deeplearning4j.nn.api.Layer; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; - +@Data +@EqualsAndHashCode(callSuper = true) public class UnitNormConstraint extends BaseConstraint{ public UnitNormConstraint(int... dimensions){ - super(true, false, dimensions); + this(true, false, dimensions); + } + + public UnitNormConstraint(boolean applyToWeights, boolean applyToBiases, int... dimensions){ + super(applyToWeights, applyToBiases, dimensions); } @Override @@ -17,4 +23,9 @@ public void apply(INDArray param, boolean isBias) { INDArray norm2 = param.norm2(dimensions); Broadcast.div(param, norm2, param, dimensions ); } + + @Override + public UnitNormConstraint clone() { + return new UnitNormConstraint(applyToWeights, applyToBiases, dimensions); + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index 37f812a929d7..3f886e7f8f67 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -1,6 +1,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -10,6 +11,7 @@ import org.nd4j.linalg.learning.config.*; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -290,15 +292,15 @@ else if (momentumSchedule == null && layer.getMomentumSchedule() == null) public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, Double dropOut, Double l2, Double l2Bias, Double l1, Double l1Bias, - Distribution dist) { + Distribution dist, List constraints) { generalValidation(layerName, layer, useDropConnect, dropOut == null ? 0.0 : dropOut, l2 == null ? Double.NaN : l2, l2Bias == null ? Double.NaN : l2Bias, - l1 == null ? Double.NaN : l1, l1Bias == null ? Double.NaN : l1Bias, dist); + l1 == null ? Double.NaN : l1, l1Bias == null ? Double.NaN : l1Bias, dist, constraints); } public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, double dropOut, double l2, double l2Bias, double l1, double l1Bias, - Distribution dist) { + Distribution dist, List constraints) { if (layer != null) { @@ -318,6 +320,12 @@ public static void generalValidation(String layerName, Layer layer, boolean useD configureBaseLayer(layerName, bLayer, useDropConnect, dropOut, l2, l2Bias, l1, l1Bias, dist); } + + if(constraints != null){ + if(layer.getConstraints() == null){ + layer.setConstraints(constraints); + } + } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java index 4d6651fc3be1..0dd97c73b9b9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java @@ -381,6 +381,11 @@ public void clear() { insideLayer.clear(); } + @Override + public void applyConstraints(int iteration, int epoch) { + //No-op + } + /** * Init the model */ diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java index 01c29b8e76e1..08350bebd177 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java @@ -3,6 +3,7 @@ import lombok.AllArgsConstructor; import lombok.Data; import lombok.Getter; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -582,6 +583,15 @@ public void clear() { this.maskArray = null; } + @Override + public void applyConstraints(int iteration, int epoch) { + if(layerConf().getConstraints() != null){ + for(LayerConstraint lc : layerConf().getConstraints()){ + lc.applyConstraint(this, iteration, epoch); + } + } + } + public boolean isPretrainParam(String param) { return !(param.startsWith("e") || param.startsWith(VariationalAutoencoderParamInitializer.PZX_MEAN_PREFIX)); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java index bf0f9c8733a7..e13cc8de76b8 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java @@ -51,6 +51,16 @@ public int numParams(Layer l) { } } + @Override + public boolean isWeightParam(String key) { + return false; + } + + @Override + public boolean isBiasParam(String key) { + return false; + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java index f70697a8c181..c4be30a2512c 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java @@ -66,6 +66,16 @@ public int numParams(Layer l) { return nIn * nOut * kernel[0] * kernel[1] + (layerConf.hasBias() ? nOut : 0); } + @Override + public boolean isWeightParam(String key) { + return WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(String key) { + return BIAS_KEY.equals(key); + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { ConvolutionLayer layer = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index 644d971ed243..7670e61f13da 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -61,6 +61,16 @@ public int numParams(Layer l) { return nIn * nOut + (hasBias(l) ? nOut : 0); //weights + bias } + @Override + public boolean isWeightParam(String key) { + return WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(String key) { + return BIAS_KEY.equals(key); + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java index 937ccbb58a26..9725477edf35 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java @@ -29,6 +29,16 @@ public int numParams(Layer layer) { return 0; } + @Override + public boolean isWeightParam(String key) { + return false; + } + + @Override + public boolean isBiasParam(String key) { + return false; + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { return Collections.EMPTY_MAP; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java index 058d1343faeb..e2e24a3abf42 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java @@ -33,6 +33,16 @@ public int numParams(Layer layer) { return initializer.numParams(fl.getLayer()); } + @Override + public boolean isWeightParam(String key) { + return false; + } + + @Override + public boolean isBiasParam(String key) { + return false; + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { FrozenLayer fl = (FrozenLayer) conf.getLayer(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java index 49ac80cb9ca5..1185389dc659 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java @@ -75,6 +75,17 @@ public int numParams(Layer l) { return 2 * nParamsForward; } + @Override + public boolean isWeightParam(String key) { + return RECURRENT_WEIGHT_KEY_FORWARDS.equals(key) || INPUT_WEIGHT_KEY_FORWARDS.equals(key) + || RECURRENT_WEIGHT_KEY_BACKWARDS.equals(key) || INPUT_WEIGHT_KEY_BACKWARDS.equals(key); + } + + @Override + public boolean isBiasParam(String key) { + return BIAS_KEY_FORWARDS.equals(key) || BIAS_KEY_BACKWARDS.equals(key); + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java index 98f9ce07661b..b27497d897a5 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java @@ -69,6 +69,16 @@ public int numParams(Layer l) { return nParams; } + @Override + public boolean isWeightParam(String key) { + return RECURRENT_WEIGHT_KEY.equals(key) || INPUT_WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(String key) { + return BIAS_KEY.equals(key); + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java index e943a75cc7d2..ef20acacf160 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java @@ -70,6 +70,16 @@ public int numParams(Layer l) { return nParams; } + @Override + public boolean isWeightParam(String key) { + return RECURRENT_WEIGHT_KEY.equals(key) || INPUT_WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(String key) { + return BIAS_KEY.equals(key); + } + @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index dc87d9fdc202..28bdca0d6f5d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -5,6 +5,7 @@ import lombok.Data; import lombok.NoArgsConstructor; import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.layers.*; @@ -18,6 +19,7 @@ import org.nd4j.shade.jackson.core.JsonProcessingException; import java.io.IOException; +import java.util.List; import java.util.Map; /** @@ -76,6 +78,7 @@ public class FineTuneConfiguration { protected Double lrPolicySteps; protected Double lrPolicyPower; protected ConvolutionMode convolutionMode; + protected List constraints; protected Boolean pretrain; protected Boolean backprop; @@ -284,7 +287,7 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { adamMeanDecay, adamVarDecay, rho, rmsDecay, epsilon); boolean useDropCon = (useDropConnect == null ? nnc.isUseDropConnect() : useDropConnect); - LayerValidation.generalValidation(l.getLayerName(), l, useDropCon, dropOut, l2, l2Bias, l1, l1Bias, dist); + LayerValidation.generalValidation(l.getLayerName(), l, useDropCon, dropOut, l2, l2Bias, l1, l1Bias, dist, constraints); } //Also: update the LR, L1 and L2 maps, based on current config (which might be different to original config) From af3bd1342b1d34586c83632b13032eb9b2d8c8c9 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 30 Aug 2017 20:20:33 +1000 Subject: [PATCH 6/9] Fix constraint implementations --- .../nn/conf/constraints/TestConstraints.java | 8 +++--- .../nn/conf/constraint/BaseConstraint.java | 12 ++++++++ .../nn/conf/constraint/MaxNormConstraint.java | 4 +-- .../conf/constraint/MinMaxNormConstraint.java | 28 +++++++++++++++---- .../conf/constraint/UnitNormConstraint.java | 2 +- 5 files changed, 41 insertions(+), 13 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index f4b25fc450e8..52c87f154f86 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -64,14 +64,14 @@ public void testConstraints(){ INDArray w1 = net.getParam("1_W"); INDArray b1 = net.getParam("1_b"); - System.out.println(w0.norm2(1)); - if(lc instanceof MaxNormConstraint){ assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.5 ); assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.5 ); } else if(lc instanceof MinMaxNormConstraint){ - assertTrue(w0.minNumber().doubleValue() >= 0.3 ); - assertTrue(w1.maxNumber().doubleValue() <= 0.4 ); + assertTrue(w0.norm2(1).minNumber().doubleValue() >= 0.3 ); + assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.4 ); + assertTrue(w1.norm2(1).minNumber().doubleValue() >= 0.3 ); + assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.4 ); } else if(lc instanceof NonNegativeConstraint ){ assertTrue(w0.minNumber().doubleValue() >= 0.0 ); } else if(lc instanceof UnitNormConstraint ){ diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index f9c3304606be..ab9043bbeab4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -3,6 +3,7 @@ import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; +import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -48,4 +49,15 @@ public void applyConstraint(Layer layer, int iteration, int epoch) { public abstract void apply(INDArray param, boolean isBias); public abstract BaseConstraint clone(); + + public static int[] getBroadcastDims(int[] reduceDimensions, int rank){ + int[] out = new int[rank-reduceDimensions.length]; + int outPos = 0; + for( int i=0; i 1.0){ + throw new IllegalStateException("Invalid rate: must be in interval (0,1]: got " + rate); + } this.min = min; this.max = max; + this.rate = rate; } @Override public void apply(INDArray param, boolean isBias) { - BooleanIndexing.replaceWhere(param, min, Conditions.lessThan(min)); - BooleanIndexing.replaceWhere(param, max, Conditions.lessThan(max)); + INDArray norm = param.norm2(dimensions); + INDArray clipped = norm.unsafeDuplication(); + BooleanIndexing.replaceWhere(clipped, max, Conditions.greaterThan(max)); + BooleanIndexing.replaceWhere(clipped, min, Conditions.lessThan(min)); + + norm.addi(epsilon); + clipped.divi(norm); + + if(rate != 1.0){ + clipped.muli(rate).addi(norm.muli(1.0-rate)); + } + + Broadcast.mul(param, clipped, param, getBroadcastDims(dimensions, param.rank()) ); } @Override public MinMaxNormConstraint clone() { - return new MinMaxNormConstraint(min, max, applyToWeights, applyToBiases, dimensions); + return new MinMaxNormConstraint(min, max, rate, applyToWeights, applyToBiases, dimensions); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 6fcfbcbe3530..7f2542e44f16 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -21,7 +21,7 @@ public UnitNormConstraint(boolean applyToWeights, boolean applyToBiases, int... @Override public void apply(INDArray param, boolean isBias) { INDArray norm2 = param.norm2(dimensions); - Broadcast.div(param, norm2, param, dimensions ); + Broadcast.div(param, norm2, param, getBroadcastDims(dimensions, param.rank()) ); } @Override From 07915f722b9f53e2323ff38fdf66bb863ed67445 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 30 Aug 2017 20:33:26 +1000 Subject: [PATCH 7/9] Javadoc --- .../nn/conf/constraint/BaseConstraint.java | 3 -- .../nn/conf/constraint/MaxNormConstraint.java | 24 ++++++++++++++- .../conf/constraint/MinMaxNormConstraint.java | 30 ++++++++++++++++++- .../constraint/NonNegativeConstraint.java | 6 +++- .../conf/constraint/UnitNormConstraint.java | 22 +++++++++++++- 5 files changed, 78 insertions(+), 7 deletions(-) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index ab9043bbeab4..664e9789ce6d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -8,9 +8,6 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Broadcast; -import org.nd4j.linalg.indexing.BooleanIndexing; -import org.nd4j.linalg.indexing.conditions.Conditions; import java.util.Map; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index 523137832e92..4c44156f35f2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -2,22 +2,44 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; +/** + * Constrain the maximum L2 norm of the incoming weights for each unit to be less than or equal to the specified value. + * If the L2 norm exceeds the specified value, the weights will be scaled down to satisfy the constraint. + * + * @author Alex Black + */ @Data @EqualsAndHashCode(callSuper = true) public class MaxNormConstraint extends BaseConstraint { private double maxNorm; + /** + * Apply to weights but not biases by default + * + * @param maxNorm Maximum L2 value + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public MaxNormConstraint(double maxNorm, int... dimensions) { this(maxNorm, true, false, dimensions); } + /** + * + * @param maxNorm Maximum L2 value + * @param applyToWeights If constraint should be applied to weights + * @param applyToBiases If constraint should be applied to biases + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public MaxNormConstraint(double maxNorm, boolean applyToWeights, boolean applyToBiases, int... dimensions){ super(applyToWeights, applyToBiases, DEFAULT_EPSILON, dimensions); this.maxNorm = maxNorm; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index a7d4b1ce30ce..db4f26640038 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -2,12 +2,21 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; +/** + * Constrain the minimum AND maximum L2 norm of the incoming weights for each unit to be between the specified values. + * If the L2 norm exceeds the specified max value, the weights will be scaled down to satisfy the constraint; if the + * L2 norm is less than the specified min value, the weights will be scaled up
+ * Note that this constraint supports a rate parameter (default: 1.0, which is equivalent to a strict constraint). + * If rate < 1.0, the applied norm2 constraint will be (1-rate)*norm2 + rate*clippedNorm2, where clippedNorm2 is the + * norm2 value after applying clipping to min/max values. + * + * @author Alex Black + */ @Data @EqualsAndHashCode(callSuper = true) public class MinMaxNormConstraint extends BaseConstraint { @@ -17,10 +26,29 @@ public class MinMaxNormConstraint extends BaseConstraint { private double max; private double rate; + /** + * Apply to weights but not biases by default + * + * @param max Maximum L2 value + * @param min Minimum L2 value + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public MinMaxNormConstraint(double min, double max, int... dimensions){ this(min, max, DEFAULT_RATE, true, false, dimensions); } + /** + * + * @param max Maximum L2 value + * @param min Minimum L2 value + * @param applyToWeights If constraint should be applied to weights + * @param applyToBiases If constraint should be applied to biases + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public MinMaxNormConstraint(double min, double max, double rate, boolean applyToWeights, boolean applyToBiases, int... dimensions){ super(applyToWeights, applyToBiases, dimensions); if(rate <= 0 || rate > 1.0){ diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java index 4f928d7d5aee..21866ac85710 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java @@ -2,7 +2,6 @@ import lombok.AllArgsConstructor; import lombok.Data; -import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -12,6 +11,11 @@ import java.util.Map; +/** + * Constrain the weights to be non-negative + * + * @author Alex Black + */ @AllArgsConstructor @Data public class NonNegativeConstraint implements LayerConstraint { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 7f2542e44f16..21b7b7dfc731 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -2,18 +2,38 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; +/** + * Constrain the L2 norm of the incoming weights for each unit to be 1.0 + * + * @author Alex Black + */ @Data @EqualsAndHashCode(callSuper = true) public class UnitNormConstraint extends BaseConstraint{ + /** + * Apply to weights but not biases by default + * + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public UnitNormConstraint(int... dimensions){ this(true, false, dimensions); } + /** + * Apply to weights but not biases by default + * + * @param applyToWeights If constraint should be applied to weights + * @param applyToBiases If constraint should be applied to biases (usually false) + * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * parameters which have order [depthOut, depthIn, kH, kW] + */ public UnitNormConstraint(boolean applyToWeights, boolean applyToBiases, int... dimensions){ super(applyToWeights, applyToBiases, dimensions); } From 94972d4b69ba3c75196b52ff3a9f5c318d1d6c8f Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 30 Aug 2017 20:39:45 +1000 Subject: [PATCH 8/9] Test + fix JSON ser/de for constraints --- .../nn/conf/constraints/TestConstraints.java | 17 ++++++++++++++++- .../nn/conf/constraint/BaseConstraint.java | 4 ++++ .../nn/conf/constraint/MaxNormConstraint.java | 4 ++++ .../conf/constraint/MinMaxNormConstraint.java | 4 ++++ .../nn/conf/constraint/UnitNormConstraint.java | 4 ++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index 52c87f154f86..d9215556cc8b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -10,13 +10,17 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.util.ModelSerializer; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.util.Collections; import java.util.List; @@ -26,7 +30,7 @@ public class TestConstraints { @Test - public void testConstraints(){ + public void testConstraints() throws Exception { LayerConstraint[] constraints = new LayerConstraint[]{ @@ -80,6 +84,17 @@ public void testConstraints(){ assertEquals(w1.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 ); assertEquals(w1.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 ); } + + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + + assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.params(), restored.params()); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index 664e9789ce6d..9653bd3cb364 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -23,6 +23,10 @@ public abstract class BaseConstraint implements LayerConstraint { protected double epsilon = 1e-6; protected int[] dimensions; + protected BaseConstraint(){ + //No arg for json ser/de + } + protected BaseConstraint(boolean applyToWeights, boolean applyToBiases, int... dimensions){ this(applyToWeights, applyToBiases, DEFAULT_EPSILON, dimensions); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index 4c44156f35f2..c1fa4e501228 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -19,6 +19,10 @@ public class MaxNormConstraint extends BaseConstraint { private double maxNorm; + private MaxNormConstraint(){ + //No arg for json ser/de + } + /** * Apply to weights but not biases by default * diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index db4f26640038..9348cea04cc9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -26,6 +26,10 @@ public class MinMaxNormConstraint extends BaseConstraint { private double max; private double rate; + private MinMaxNormConstraint(){ + //No arg for json ser/de + } + /** * Apply to weights but not biases by default * diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 21b7b7dfc731..51708b440ebc 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -14,6 +14,10 @@ @EqualsAndHashCode(callSuper = true) public class UnitNormConstraint extends BaseConstraint{ + private UnitNormConstraint(){ + //No arg for json ser/de + } + /** * Apply to weights but not biases by default * From 6715b015679c5250ee22bfd24978bf1aece8f6c6 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 31 Aug 2017 10:16:45 +1000 Subject: [PATCH 9/9] Typos and unnecessary arg removal --- .../deeplearning4j/nn/conf/constraint/BaseConstraint.java | 4 ++-- .../nn/conf/constraint/MaxNormConstraint.java | 6 +++--- .../nn/conf/constraint/MinMaxNormConstraint.java | 6 +++--- .../nn/conf/constraint/UnitNormConstraint.java | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index 9653bd3cb364..fc4576502f06 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -42,12 +42,12 @@ public void applyConstraint(Layer layer, int iteration, int epoch) { ParamInitializer i = layer.conf().getLayer().initializer(); for(Map.Entry e : paramTable.entrySet()){ if(applyToWeights && i.isWeightParam(e.getKey()) || applyToBiases && i.isBiasParam(e.getKey())){ - apply(e.getValue(), i.isBiasParam(e.getKey())); + apply(e.getValue()); } } } - public abstract void apply(INDArray param, boolean isBias); + public abstract void apply(INDArray param); public abstract BaseConstraint clone(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index c1fa4e501228..164b0e9d30f8 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -28,7 +28,7 @@ private MaxNormConstraint(){ * * @param maxNorm Maximum L2 value * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public MaxNormConstraint(double maxNorm, int... dimensions) { @@ -41,7 +41,7 @@ public MaxNormConstraint(double maxNorm, int... dimensions) { * @param applyToWeights If constraint should be applied to weights * @param applyToBiases If constraint should be applied to biases * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public MaxNormConstraint(double maxNorm, boolean applyToWeights, boolean applyToBiases, int... dimensions){ @@ -51,7 +51,7 @@ public MaxNormConstraint(double maxNorm, boolean applyToWeights, boolean applyTo @Override - public void apply(INDArray param, boolean isBias){ + public void apply(INDArray param){ INDArray norm = param.norm2(dimensions); INDArray clipped = norm.unsafeDuplication(); BooleanIndexing.replaceWhere(clipped, maxNorm, Conditions.greaterThan(maxNorm)); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index 9348cea04cc9..2705e73d806a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -36,7 +36,7 @@ private MinMaxNormConstraint(){ * @param max Maximum L2 value * @param min Minimum L2 value * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public MinMaxNormConstraint(double min, double max, int... dimensions){ @@ -50,7 +50,7 @@ public MinMaxNormConstraint(double min, double max, int... dimensions){ * @param applyToWeights If constraint should be applied to weights * @param applyToBiases If constraint should be applied to biases * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public MinMaxNormConstraint(double min, double max, double rate, boolean applyToWeights, boolean applyToBiases, int... dimensions){ @@ -64,7 +64,7 @@ public MinMaxNormConstraint(double min, double max, double rate, boolean applyTo } @Override - public void apply(INDArray param, boolean isBias) { + public void apply(INDArray param) { INDArray norm = param.norm2(dimensions); INDArray clipped = norm.unsafeDuplication(); BooleanIndexing.replaceWhere(clipped, max, Conditions.greaterThan(max)); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 51708b440ebc..61193c944f13 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -22,7 +22,7 @@ private UnitNormConstraint(){ * Apply to weights but not biases by default * * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public UnitNormConstraint(int... dimensions){ @@ -35,7 +35,7 @@ public UnitNormConstraint(int... dimensions){ * @param applyToWeights If constraint should be applied to weights * @param applyToBiases If constraint should be applied to biases (usually false) * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should - * be dimension 1. For CNNs, this should be dimensions [1,2,3] correspending to last 3 of + * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ public UnitNormConstraint(boolean applyToWeights, boolean applyToBiases, int... dimensions){ @@ -43,7 +43,7 @@ public UnitNormConstraint(boolean applyToWeights, boolean applyToBiases, int... } @Override - public void apply(INDArray param, boolean isBias) { + public void apply(INDArray param) { INDArray norm2 = param.norm2(dimensions); Broadcast.div(param, norm2, param, getBroadcastDims(dimensions, param.rank()) ); }