Skip to content

Commit

Permalink
Merge pull request #5018 from deeplearning4j/ab_miscfixes
Browse files Browse the repository at this point in the history
Various fixes and features
  • Loading branch information
AlexDBlack committed May 1, 2018
2 parents 355747d + bb7bc6b commit 4013ccc
Show file tree
Hide file tree
Showing 15 changed files with 317 additions and 438 deletions.
Expand Up @@ -3,6 +3,8 @@
import org.deeplearning4j.api.storage.StatsStorageRouter;
import org.deeplearning4j.optimize.api.TrainingListener;

import java.io.Serializable;

/**
* An extension of the {@link TrainingListener} interface for those listeners that pass data off to a
* {@link org.deeplearning4j.api.storage.StatsStorageRouter} instance.
Expand All @@ -13,7 +15,7 @@
*
* @author Alex Black
*/
public interface RoutingIterationListener extends TrainingListener, Cloneable {
public interface RoutingIterationListener extends TrainingListener, Cloneable, Serializable {

void setStorageRouter(StatsStorageRouter router);

Expand Down
Expand Up @@ -1340,10 +1340,20 @@ public void testLayerSize(){
assertEquals(30, net.layerSize(2));
assertEquals(13, net.layerSize(3));

assertEquals(3, net.layerInputSize(0));
assertEquals(0, net.layerInputSize(1));
assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2));
assertEquals(30, net.layerInputSize(3));

assertEquals(6, net.layerSize("0"));
assertEquals(0, net.layerSize("1"));
assertEquals(30, net.layerSize("2"));
assertEquals(13, net.layerSize("3"));

assertEquals(3, net.layerInputSize("0"));
assertEquals(0, net.layerInputSize("1"));
assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize("2"));
assertEquals(30, net.layerInputSize("3"));
}

@Test
Expand Down
Expand Up @@ -1217,6 +1217,11 @@ public void testLayerSize(){
assertEquals(0, net.layerSize(1));
assertEquals(30, net.layerSize(2));
assertEquals(13, net.layerSize(3));

assertEquals(3, net.layerInputSize(0));
assertEquals(0, net.layerInputSize(1));
assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2));
assertEquals(30, net.layerInputSize(3));
}


Expand Down Expand Up @@ -1246,4 +1251,45 @@ public void testZeroParamNet() throws Exception {
INDArray out2 = net2.output(ds.getFeatures());
assertEquals(out, out2);
}


@Test
public void testInputActivationGradient(){

MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(12345)
.activation(Activation.TANH)
.list()
.layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
.build();

MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();

INDArray in = Nd4j.rand(1, 10);
INDArray label = Nd4j.rand(1, 10);

Pair<Gradient,INDArray> p = net.calculateGradients(in, label, null, null);

//Quick gradient check:
double eps = 1e-6;
double maxRelError = 1e-5;
for( int i=0; i<10; i++ ){
double orig = in.getDouble(i);
in.putScalar(i, orig + eps);
double scorePlus = net.score(new DataSet(in, label));
in.putScalar(i, orig - eps);
double scoreMinus = net.score(new DataSet(in, label));
in.putScalar(i, orig);

double expGrad = (scorePlus - scoreMinus) / (2.0 * eps);
double actGrad = p.getSecond().getDouble(i);

double relError = (Math.abs(expGrad - actGrad)) / (Math.abs(expGrad) + Math.abs(actGrad));

String str = i + " - " + relError + " - exp=" + expGrad + ", act=" + actGrad;
assertTrue(str, relError < maxRelError);
}
}
}

This file was deleted.

Expand Up @@ -3994,7 +3994,26 @@ public int layerSize(int layer) {
}

/**
* Return the layer size (number of units) for the specified layer.
* Return the input size (number of inputs) for the specified layer.<br>
* Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
* - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
* - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
* - ConvolutionLayer: the channels (number of channels)<br>
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
*
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
* @return Size of the layer
*/
public int layerInputSize(int layer) {
if (layer < 0 || layer > layers.length) {
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
+ (layers.length - 1) + " inclusive");
}
return layerInputSize(layers[layer].conf().getLayer().getLayerName());
}

/**
* Return the layer size (number of units) for the specified layer.<br>
* Note that the meaning of the "layer size" can depend on the type of layer. For example:<br>
* - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)<br>
* - ConvolutionLayer: the channels (number of channels)<br>
Expand All @@ -4016,6 +4035,30 @@ public int layerSize(String layerName) {
return ffl.getNOut();
}

/**
* Return the input size (number of inputs) for the specified layer.<br>
* Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
* - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
* - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
* - ConvolutionLayer: the channels (number of channels)<br>
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
*
* @param layerName Name of the layer to get the size of
* @return Size of the layer
*/
public int layerInputSize(String layerName) {
Layer l = getLayer(layerName);
if(l == null){
throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
}
org.deeplearning4j.nn.conf.layers.Layer conf = l.conf().getLayer();
if (conf == null || !(conf instanceof FeedForwardLayer)) {
return 0;
}
FeedForwardLayer ffl = (FeedForwardLayer) conf;
return ffl.getNIn();
}

/**
* Indicates whether some other object is "equal to" this one.
* <p>
Expand Down
Expand Up @@ -18,7 +18,6 @@

package org.deeplearning4j.nn.layers;

import com.google.common.base.Preconditions;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.api.layers.IOutputLayer;
Expand All @@ -27,6 +26,7 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.Solver;
import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
Expand Down
Expand Up @@ -1449,16 +1449,80 @@ public void fit(DataSetIterator iterator) {
incrementEpochCount();
}

/**
* Calculate parameter gradients and input activation gradients given the input and labels
*
* @param features Features for gradient calculation
* @param label Labels for gradient
* @param fMask Features mask array (may be null)
* @param labelMask Label mask array (may be null)
* @return A pair of gradient arrays: parameter gradients (in Gradient object) and input activation gradients
*/
public Pair<Gradient,INDArray> calculateGradients(@NonNull INDArray features, @NonNull INDArray label,
INDArray fMask, INDArray labelMask){
setInput(features);
setLabels(label);
setLayerMaskArrays(fMask, labelMask);

LayerWorkspaceMgr mgr;
if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){
mgr = LayerWorkspaceMgr.noWorkspaces();
} else {
mgr = LayerWorkspaceMgr.builder()
.with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
.with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
.with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
.with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
.with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
.with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
.build();

if(layerWiseConfigurations.getCacheMode() != null){
//For now: store cache mode activations in activations workspace
mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG);
}
}

//Calculate activations (which are stored in each layer, and used in backprop)
try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) {
//First: do a feed-forward through the network
//Note that we don't actually need to do the full forward pass through the output layer right now; but we do
// need the input to the output layer to be set (such that backprop can be done)
List<INDArray> activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, false, input, mask, fMask);
if (!trainingListeners.isEmpty()) {
//TODO: We possibly do want output layer activations in some cases here...
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
INDArray inputToOutputLayer = activations.get(activations.size() - 1);
if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) {
inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1)
.preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr);
//Validate activations location
}
getOutputLayer().setInput(inputToOutputLayer, mgr);

Pair<Gradient,INDArray> p = calcBackpropGradients(null, true, false, true);
if(p.getSecond() != null){
p.setSecond( p.getSecond().detach());
}
return p;
}
}

/** Calculate gradients and errors. Used in two places:
* (a) backprop (for standard multi layer network learning)
* (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
* @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
* @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
* case, the epsilon input is not used (may/should be null).
* If false: calculate backprop gradients
* @param returnInputActGrad If true: terun the input activation gradients (detached). False: don't return
* @return Gradients and the error (epsilon) at the input
*/
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt) {
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt,
boolean returnInputActGrad) {
if (flattenedGradients == null) {
initGradientsView();
}
Expand Down Expand Up @@ -1602,6 +1666,14 @@ protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boole
}
}

if(i == 0 ){
if(returnInputActGrad && currPair.getSecond() != null){
currPair.setSecond(currPair.getSecond().detach());
} else {
currPair.setSecond(null);
}
}

if(wsActGradCloseNext != null){
wsActGradCloseNext.close();
}
Expand Down Expand Up @@ -2336,7 +2408,7 @@ public void computeGradientAndScore() {
}
getOutputLayer().setInput(inputToOutputLayer, mgr);
//Then: compute gradients
Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false);
Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false, false);
this.gradient = (pair == null ? null : pair.getFirst());

//Calculate score
Expand Down Expand Up @@ -2577,7 +2649,7 @@ public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspac
if (getOutputLayer() instanceof IOutputLayer)
throw new UnsupportedOperationException("Cannot calculate gradients based on epsilon with OutputLayer");

return calcBackpropGradients(epsilon, false, false);
return calcBackpropGradients(epsilon, false, false, true);
}

@Override
Expand Down Expand Up @@ -3297,7 +3369,7 @@ public void setLearningRate(int layerNumber, ISchedule newLr){
}

/**
* Return the layer size (number of units) for the specified layer.
* Return the layer size (number of units) for the specified layer.<br>
* Note that the meaning of the "layer size" can depend on the type of layer. For example:<br>
* - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)<br>
* - ConvolutionLayer: the channels (number of channels)<br>
Expand All @@ -3319,6 +3391,30 @@ public int layerSize(int layer) {
return ffl.getNOut();
}

/**
* Return the input size (number of inputs) for the specified layer.<br>
* Note that the meaning of the "input size" can depend on the type of layer. For example:<br>
* - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)<br>
* - Recurrent layers: the feature vector size <i>per time step</i> (nIn configuration option)<br>
* - ConvolutionLayer: the channels (number of channels)<br>
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned<br>
*
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
* @return Size of the layer
*/
public int layerInputSize(int layer) {
if (layer < 0 || layer > layers.length) {
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
+ (layers.length - 1) + " inclusive");
}
org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer();
if (conf == null || !(conf instanceof FeedForwardLayer)) {
return 0;
}
FeedForwardLayer ffl = (FeedForwardLayer) conf;
return ffl.getNIn();
}

/**
* Indicates whether some other object is "equal to" this one.
* <p>
Expand Down

0 comments on commit 4013ccc

Please sign in to comment.