Skip to content

Commit

Permalink
#2866 MultiLayerNetwork: Backprop errors for model interrogation
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexDBlack committed May 1, 2018
1 parent fb09576 commit bb7bc6b
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1251,4 +1251,45 @@ public void testZeroParamNet() throws Exception {
INDArray out2 = net2.output(ds.getFeatures());
assertEquals(out, out2);
}


@Test
public void testInputActivationGradient(){

MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(12345)
.activation(Activation.TANH)
.list()
.layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
.build();

MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();

INDArray in = Nd4j.rand(1, 10);
INDArray label = Nd4j.rand(1, 10);

Pair<Gradient,INDArray> p = net.calculateGradients(in, label, null, null);

//Quick gradient check:
double eps = 1e-6;
double maxRelError = 1e-5;
for( int i=0; i<10; i++ ){
double orig = in.getDouble(i);
in.putScalar(i, orig + eps);
double scorePlus = net.score(new DataSet(in, label));
in.putScalar(i, orig - eps);
double scoreMinus = net.score(new DataSet(in, label));
in.putScalar(i, orig);

double expGrad = (scorePlus - scoreMinus) / (2.0 * eps);
double actGrad = p.getSecond().getDouble(i);

double relError = (Math.abs(expGrad - actGrad)) / (Math.abs(expGrad) + Math.abs(actGrad));

String str = i + " - " + relError + " - exp=" + expGrad + ", act=" + actGrad;
assertTrue(str, relError < maxRelError);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.deeplearning4j.nn.layers;

import com.google.common.base.Preconditions;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.api.layers.IOutputLayer;
Expand All @@ -27,6 +26,7 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.Solver;
import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1449,16 +1449,80 @@ public void fit(DataSetIterator iterator) {
incrementEpochCount();
}

/**
* Calculate parameter gradients and input activation gradients given the input and labels
*
* @param features Features for gradient calculation
* @param label Labels for gradient
* @param fMask Features mask array (may be null)
* @param labelMask Label mask array (may be null)
* @return A pair of gradient arrays: parameter gradients (in Gradient object) and input activation gradients
*/
public Pair<Gradient,INDArray> calculateGradients(@NonNull INDArray features, @NonNull INDArray label,
INDArray fMask, INDArray labelMask){
setInput(features);
setLabels(label);
setLayerMaskArrays(fMask, labelMask);

LayerWorkspaceMgr mgr;
if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){
mgr = LayerWorkspaceMgr.noWorkspaces();
} else {
mgr = LayerWorkspaceMgr.builder()
.with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
.with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG)
.with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
.with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG)
.with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
.with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
.build();

if(layerWiseConfigurations.getCacheMode() != null){
//For now: store cache mode activations in activations workspace
mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG);
}
}

//Calculate activations (which are stored in each layer, and used in backprop)
try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) {
//First: do a feed-forward through the network
//Note that we don't actually need to do the full forward pass through the output layer right now; but we do
// need the input to the output layer to be set (such that backprop can be done)
List<INDArray> activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, false, input, mask, fMask);
if (!trainingListeners.isEmpty()) {
//TODO: We possibly do want output layer activations in some cases here...
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
INDArray inputToOutputLayer = activations.get(activations.size() - 1);
if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) {
inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1)
.preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr);
//Validate activations location
}
getOutputLayer().setInput(inputToOutputLayer, mgr);

Pair<Gradient,INDArray> p = calcBackpropGradients(null, true, false, true);
if(p.getSecond() != null){
p.setSecond( p.getSecond().detach());
}
return p;
}
}

/** Calculate gradients and errors. Used in two places:
* (a) backprop (for standard multi layer network learning)
* (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
* @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
* @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
* case, the epsilon input is not used (may/should be null).
* If false: calculate backprop gradients
* @param returnInputActGrad If true: terun the input activation gradients (detached). False: don't return
* @return Gradients and the error (epsilon) at the input
*/
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt) {
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt,
boolean returnInputActGrad) {
if (flattenedGradients == null) {
initGradientsView();
}
Expand Down Expand Up @@ -1602,6 +1666,14 @@ protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boole
}
}

if(i == 0 ){
if(returnInputActGrad && currPair.getSecond() != null){
currPair.setSecond(currPair.getSecond().detach());
} else {
currPair.setSecond(null);
}
}

if(wsActGradCloseNext != null){
wsActGradCloseNext.close();
}
Expand Down Expand Up @@ -2336,7 +2408,7 @@ public void computeGradientAndScore() {
}
getOutputLayer().setInput(inputToOutputLayer, mgr);
//Then: compute gradients
Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false);
Pair<Gradient, INDArray> pair = calcBackpropGradients(null, true, false, false);
this.gradient = (pair == null ? null : pair.getFirst());

//Calculate score
Expand Down Expand Up @@ -2577,7 +2649,7 @@ public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspac
if (getOutputLayer() instanceof IOutputLayer)
throw new UnsupportedOperationException("Cannot calculate gradients based on epsilon with OutputLayer");

return calcBackpropGradients(epsilon, false, false);
return calcBackpropGradients(epsilon, false, false, true);
}

@Override
Expand Down

0 comments on commit bb7bc6b

Please sign in to comment.