Merge pull request #5018 from deeplearning4j/ab_miscfixes

Various fixes and features
deeplearning4j · May 1, 2018 · 4013ccc · 4013ccc
2 parents 355747d + bb7bc6b
commit 4013ccc
Show file tree

Hide file tree

Showing 15 changed files with 317 additions and 438 deletions.
diff --git a/...-core/src/main/java/org/deeplearning4j/api/storage/listener/RoutingIterationListener.java b/...-core/src/main/java/org/deeplearning4j/api/storage/listener/RoutingIterationListener.java
@@ -3,6 +3,8 @@
 import org.deeplearning4j.api.storage.StatsStorageRouter;
 import org.deeplearning4j.optimize.api.TrainingListener;
 
+import java.io.Serializable;
+
 /**
  * An extension of the {@link TrainingListener} interface for those listeners that pass data off to a
  * {@link org.deeplearning4j.api.storage.StatsStorageRouter} instance.
@@ -13,7 +15,7 @@
  *
  * @author Alex Black
  */
-public interface RoutingIterationListener extends TrainingListener, Cloneable {
+public interface RoutingIterationListener extends TrainingListener, Cloneable, Serializable {
 
     void setStorageRouter(StatsStorageRouter router);
 

diff --git a/...earning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/...earning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
@@ -1340,10 +1340,20 @@ public void testLayerSize(){
         assertEquals(30, net.layerSize(2));
         assertEquals(13, net.layerSize(3));
 
+        assertEquals(3, net.layerInputSize(0));
+        assertEquals(0, net.layerInputSize(1));
+        assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2));
+        assertEquals(30, net.layerInputSize(3));
+
         assertEquals(6, net.layerSize("0"));
         assertEquals(0, net.layerSize("1"));
         assertEquals(30, net.layerSize("2"));
         assertEquals(13, net.layerSize("3"));
+
+        assertEquals(3, net.layerInputSize("0"));
+        assertEquals(0, net.layerInputSize("1"));
+        assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize("2"));
+        assertEquals(30, net.layerInputSize("3"));
     }
 
     @Test

diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java
@@ -1217,6 +1217,11 @@ public void testLayerSize(){
         assertEquals(0, net.layerSize(1));
         assertEquals(30, net.layerSize(2));
         assertEquals(13, net.layerSize(3));
+
+        assertEquals(3, net.layerInputSize(0));
+        assertEquals(0, net.layerInputSize(1));
+        assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2));
+        assertEquals(30, net.layerInputSize(3));
     }
 
 
@@ -1246,4 +1251,45 @@ public void testZeroParamNet() throws Exception {
         INDArray out2 = net2.output(ds.getFeatures());
         assertEquals(out, out2);
     }
+
+
+    @Test
+    public void testInputActivationGradient(){
+
+        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                .seed(12345)
+                .activation(Activation.TANH)
+                .list()
+                .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
+                .layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
+                .build();
+
+        MultiLayerNetwork net = new MultiLayerNetwork(conf);
+        net.init();
+
+        INDArray in = Nd4j.rand(1, 10);
+        INDArray label = Nd4j.rand(1, 10);
+
+        Pair<Gradient,INDArray> p = net.calculateGradients(in, label, null, null);
+
+        //Quick gradient check:
+        double eps = 1e-6;
+        double maxRelError = 1e-5;
+        for( int i=0; i<10; i++ ){
+            double orig = in.getDouble(i);
+            in.putScalar(i, orig + eps);
+            double scorePlus = net.score(new DataSet(in, label));
+            in.putScalar(i, orig - eps);
+            double scoreMinus = net.score(new DataSet(in, label));
+            in.putScalar(i, orig);
+
+            double expGrad = (scorePlus - scoreMinus) / (2.0 * eps);
+            double actGrad = p.getSecond().getDouble(i);
+
+            double relError = (Math.abs(expGrad - actGrad)) / (Math.abs(expGrad) + Math.abs(actGrad));
+
+            String str = i + " - " + relError + " - exp=" + expGrad + ", act=" + actGrad;
+            assertTrue(str, relError < maxRelError);
+        }
+    }
 }
diff --git a/.../src/main/java/org/deeplearning4j/datasets/iterator/ReutersNewsGroupsDataSetIterator.java b/.../src/main/java/org/deeplearning4j/datasets/iterator/ReutersNewsGroupsDataSetIterator.java
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
@@ -3994,7 +3994,26 @@ public int layerSize(int layer) {
     }
 
     /**
-     * Return the layer size (number of units) for the specified layer.
+     * Return the input size (number of inputs) for the specified layer.<br>
+     * Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
+     * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
+     * - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
+     * - ConvolutionLayer: the channels (number of channels)<br>
+     * - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
+     *
+     * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
+     * @return Size of the layer
+     */
+    public int layerInputSize(int layer) {
+        if (layer < 0 || layer > layers.length) {
+            throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
+                    + (layers.length - 1) + " inclusive");
+        }
+        return layerInputSize(layers[layer].conf().getLayer().getLayerName());
+    }
+
+    /**
+     * Return the layer size (number of units) for the specified layer.<br>
      * Note that the meaning of the "layer size" can depend on the type of layer. For example:<br>
      * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)<br>
      * - ConvolutionLayer: the channels (number of channels)<br>
@@ -4016,6 +4035,30 @@ public int layerSize(String layerName) {
         return ffl.getNOut();
     }
 
+    /**
+     * Return the input size (number of inputs) for the specified layer.<br>
+     * Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
+     * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
+     * - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
+     * - ConvolutionLayer: the channels (number of channels)<br>
+     * - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
+     *
+     * @param layerName Name of the layer to get the size of
+     * @return Size of the layer
+     */
+    public int layerInputSize(String layerName) {
+        Layer l = getLayer(layerName);
+        if(l == null){
+            throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
+        }
+        org.deeplearning4j.nn.conf.layers.Layer conf = l.conf().getLayer();
+        if (conf == null || !(conf instanceof FeedForwardLayer)) {
+            return 0;
+        }
+        FeedForwardLayer ffl = (FeedForwardLayer) conf;
+        return ffl.getNIn();
+    }
+
     /**
      * Indicates whether some other object is "equal to" this one.
      * <p>

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java
@@ -18,7 +18,6 @@
 
 package org.deeplearning4j.nn.layers;
 
-import com.google.common.base.Preconditions;
 import org.deeplearning4j.eval.Evaluation;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.api.layers.IOutputLayer;
@@ -27,6 +26,7 @@
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.params.DefaultParamInitializer;
 import org.deeplearning4j.optimize.Solver;
+import org.nd4j.base.Preconditions;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.api.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@@ -1449,16 +1449,80 @@ public void fit(DataSetIterator iterator) {
         incrementEpochCount();
     }
 
+    /**
+     * Calculate parameter gradients and input activation gradients given the input and labels
+     *
+     * @param features  Features for gradient calculation
+     * @param label     Labels for gradient
+     * @param fMask     Features mask array (may be null)
+     * @param labelMask Label mask array (may be null)
+     * @return A pair of gradient arrays: parameter gradients (in Gradient object) and input activation gradients
+     */
+    public Pair<Gradient,INDArray> calculateGradients(@NonNull INDArray features, @NonNull INDArray label,
+                                                      INDArray fMask, INDArray labelMask){
+        setInput(features);
+        setLabels(label);
+        setLayerMaskArrays(fMask, labelMask);
+
+        LayerWorkspaceMgr mgr;
+        if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){
+            mgr = LayerWorkspaceMgr.noWorkspaces();
+        } else {
+            mgr = LayerWorkspaceMgr.builder()
+                    .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
+                    .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
+                    .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
+                    .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
+                    .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
+                    .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
+                    .build();
+
+            if(layerWiseConfigurations.getCacheMode() != null){
+                //For now: store cache mode activations in activations workspace
+                mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG);
+            }
+        }
+
+        //Calculate activations (which are stored in each layer, and used in backprop)
+        try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) {
+            //First: do a feed-forward through the network
+            //Note that we don't actually need to do the full forward pass through the output layer right now; but we do
+            // need the input to the output layer to be set (such that backprop can be done)
+            List<INDArray> activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, false, input, mask, fMask);
+            if (!trainingListeners.isEmpty()) {
+                //TODO: We possibly do want output layer activations in some cases here...
+                for (TrainingListener tl : trainingListeners) {
+                    tl.onForwardPass(this, activations);
+                }
+            }
+            INDArray inputToOutputLayer = activations.get(activations.size() - 1);
+            if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) {
+                inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1)
+                        .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr);
+                //Validate activations location
+            }
+            getOutputLayer().setInput(inputToOutputLayer, mgr);
+
+            Pair<Gradient,INDArray> p = calcBackpropGradients(null, true, false, true);
+            if(p.getSecond() != null){
+                p.setSecond( p.getSecond().detach());
+            }
+            return p;
+        }
+    }
+
     /** Calculate gradients and errors. Used in two places:
      * (a) backprop (for standard multi layer network learning)
      * (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
      * @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
      * @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
      *                        case, the epsilon input is not used (may/should be null).
      *                        If false: calculate backprop gradients
+     * @param returnInputActGrad If true: terun the input activation gradients (detached). False: don't return
      * @return Gradients and the error (epsilon) at the input
      */
-    protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt) {
+    protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt,
+                                                             boolean returnInputActGrad) {
         if (flattenedGradients == null) {
             initGradientsView();
         }
@@ -1602,6 +1666,14 @@ protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boole
                         }
                     }
 
+                    if(i == 0 ){
+                        if(returnInputActGrad && currPair.getSecond() != null){
+                            currPair.setSecond(currPair.getSecond().detach());
+                        } else {
+                            currPair.setSecond(null);
+                        }
+                    }
+
                     if(wsActGradCloseNext != null){
                         wsActGradCloseNext.close();
                     }
@@ -2336,7 +2408,7 @@ public void computeGradientAndScore() {
             }
             getOutputLayer().setInput(inputToOutputLayer, mgr);
             //Then: compute gradients
-            Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false);
+            Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false, false);
             this.gradient = (pair == null ? null : pair.getFirst());
 
             //Calculate score
@@ -2577,7 +2649,7 @@ public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspac
         if (getOutputLayer() instanceof IOutputLayer)
             throw new UnsupportedOperationException("Cannot calculate gradients based on epsilon with OutputLayer");
 
-        return calcBackpropGradients(epsilon, false, false);
+        return calcBackpropGradients(epsilon, false, false, true);
     }
 
     @Override
@@ -3297,7 +3369,7 @@ public void setLearningRate(int layerNumber, ISchedule newLr){
     }
 
     /**
-     * Return the layer size (number of units) for the specified layer.
+     * Return the layer size (number of units) for the specified layer.<br>
      * Note that the meaning of the "layer size" can depend on the type of layer. For example:<br>
      * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)<br>
      * - ConvolutionLayer: the channels (number of channels)<br>
@@ -3319,6 +3391,30 @@ public int layerSize(int layer) {
         return ffl.getNOut();
     }
 
+    /**
+     * Return the input size (number of inputs) for the specified layer.<br>
+     * Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
+     * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
+     * - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
+     * - ConvolutionLayer: the channels (number of channels)<br>
+     * - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
+     *
+     * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
+     * @return Size of the layer
+     */
+    public int layerInputSize(int layer) {
+        if (layer < 0 || layer > layers.length) {
+            throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
+                    + (layers.length - 1) + " inclusive");
+        }
+        org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer();
+        if (conf == null || !(conf instanceof FeedForwardLayer)) {
+            return 0;
+        }
+        FeedForwardLayer ffl = (FeedForwardLayer) conf;
+        return ffl.getNIn();
+    }
+
     /**
      * Indicates whether some other object is "equal to" this one.
      * <p>