Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New functionality (epoch tracking and constraints) #3957

Merged
merged 9 commits into from Aug 31, 2017
Expand Up @@ -299,6 +299,11 @@ public void setParam(String key, INDArray val) {
@Override
public void clear() {}

@Override
public void applyConstraints(int iteration, int epoch) {
//No op
}

/* compute the gradient given the current solution, the probabilities and the constant */
protected Pair<Double, INDArray> gradient(INDArray p) {
throw new UnsupportedOperationException();
Expand Down
@@ -0,0 +1,102 @@
package org.deeplearning4j.nn.conf.constraints;

import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.constraint.MaxNormConstraint;
import org.deeplearning4j.nn.conf.constraint.MinMaxNormConstraint;
import org.deeplearning4j.nn.conf.constraint.NonNegativeConstraint;
import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.util.ModelSerializer;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Collections;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class TestConstraints {

@Test
public void testConstraints() throws Exception {


LayerConstraint[] constraints = new LayerConstraint[]{
new MaxNormConstraint(0.5, 1),
new MinMaxNormConstraint(0.3, 0.4, 1),
new NonNegativeConstraint(),
new UnitNormConstraint(1)
};

for(LayerConstraint lc : constraints){

MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.constraints(lc)
.learningRate(0.0)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0,5))
.list()
.layer(new DenseLayer.Builder().nIn(12).nOut(10).build())
.layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build())
.build();

MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();

List<LayerConstraint> exp = Collections.singletonList(lc.clone());
assertEquals(exp, net.getLayer(0).conf().getLayer().getConstraints());
assertEquals(exp, net.getLayer(1).conf().getLayer().getConstraints());

INDArray input = Nd4j.rand(3, 12);
INDArray labels = Nd4j.rand(3, 8);

net.fit(input, labels);

INDArray w0 = net.getParam("0_W");
INDArray b0 = net.getParam("0_b");
INDArray w1 = net.getParam("1_W");
INDArray b1 = net.getParam("1_b");

if(lc instanceof MaxNormConstraint){
assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.5 );
assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.5 );
} else if(lc instanceof MinMaxNormConstraint){
assertTrue(w0.norm2(1).minNumber().doubleValue() >= 0.3 );
assertTrue(w0.norm2(1).maxNumber().doubleValue() <= 0.4 );
assertTrue(w1.norm2(1).minNumber().doubleValue() >= 0.3 );
assertTrue(w1.norm2(1).maxNumber().doubleValue() <= 0.4 );
} else if(lc instanceof NonNegativeConstraint ){
assertTrue(w0.minNumber().doubleValue() >= 0.0 );
} else if(lc instanceof UnitNormConstraint ){
assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
assertEquals(w1.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
assertEquals(w1.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
}


ByteArrayOutputStream baos = new ByteArrayOutputStream();
ModelSerializer.writeModel(net, baos, true);
byte[] bytes = baos.toByteArray();

ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);

assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
assertEquals(net.params(), restored.params());
}

}

}
Expand Up @@ -1079,6 +1079,42 @@ public void testVertexAsOutput(){
assertNotNull(out[0]);

assertArrayEquals(new int[]{minibatch, 1, 36, 48}, out[0].shape());
}

@Test
public void testEpochCounter() throws Exception {

ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.graphBuilder()
.addInputs("in")
.addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in")
.setOutputs("out")
.build();

ComputationGraph net = new ComputationGraph(conf);
net.init();

assertEquals(0, net.getConfiguration().getEpochCount());


DataSetIterator iter = new IrisDataSetIterator(150, 150);

for( int i=0; i<4; i++ ){
assertEquals(i, net.getConfiguration().getEpochCount());
net.fit(iter);
assertEquals(i+1, net.getConfiguration().getEpochCount());
}

assertEquals(4, net.getConfiguration().getEpochCount());

ByteArrayOutputStream baos = new ByteArrayOutputStream();

ModelSerializer.writeModel(net, baos, true);
byte[] bytes = baos.toByteArray();

ByteArrayInputStream bais = new ByteArrayInputStream(bytes);

ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true);
assertEquals(4, restored.getConfiguration().getEpochCount());
}
}
Expand Up @@ -64,6 +64,7 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.*;

import static org.junit.Assert.*;
Expand Down Expand Up @@ -1151,4 +1152,40 @@ public void testCompareLayerMethods(){

assertEquals(conf1, conf2);
}


@Test
public void testEpochCounter() throws Exception {

MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.list()
.layer(new OutputLayer.Builder().nIn(4).nOut(3).build())
.build();

MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();

assertEquals(0, net.getLayerWiseConfigurations().getEpochCount());


DataSetIterator iter = new IrisDataSetIterator(150, 150);

for( int i=0; i<4; i++ ){
assertEquals(i, net.getLayerWiseConfigurations().getEpochCount());
net.fit(iter);
assertEquals(i+1, net.getLayerWiseConfigurations().getEpochCount());
}

assertEquals(4, net.getLayerWiseConfigurations().getEpochCount());

ByteArrayOutputStream baos = new ByteArrayOutputStream();

ModelSerializer.writeModel(net, baos, true);
byte[] bytes = baos.toByteArray();

ByteArrayInputStream bais = new ByteArrayInputStream(bytes);

MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount());
}
}
Expand Up @@ -1088,5 +1088,10 @@ public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskSt
public INDArray getGradientsViewArray() {
return gradientView;
}

@Override
public void applyConstraints(int iteration, int epoch) {

}
}
}
Expand Up @@ -264,4 +264,10 @@ public interface Model {
* Clear input
*/
void clear();


/**
* Apply any constraints to the model
*/
void applyConstraints(int iteration, int epoch);
}
Expand Up @@ -34,6 +34,10 @@ public interface ParamInitializer {

int numParams(org.deeplearning4j.nn.conf.layers.Layer layer);

boolean isWeightParam(String key);

boolean isBiasParam(String key);

/**
* Initialize the parameters
*
Expand Down
@@ -0,0 +1,15 @@
package org.deeplearning4j.nn.api.layers;

import org.deeplearning4j.nn.api.Layer;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;

import java.io.Serializable;

@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
public interface LayerConstraint extends Cloneable, Serializable {

void applyConstraint(Layer layer, int iteration, int epoch);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see where the call would come in the optimizer, but again it seems dangerous that this could be applied less than once (or more than once, depending on the constraint) per pass. Now that we're tracking epochs, could we have some epoch checking to make sure two successive calls are idempotent and less than one per epoch log a warning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way I'll be implementing it is that it'll get called once per iteration, after updates have been applied - it won't be possible to apply it more than once per iteration (and, even if it does: all of the implementations will give the same result with multiple sequential appliacions.
The epoch (and, iteration) number is just informational, to allow flexibility for users to have behaviour that depends on the number of epochs passed.


LayerConstraint clone();

}
Expand Up @@ -97,6 +97,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable {
// for Spark and model serialization
protected int iterationCount = 0;

//Counter for the number of epochs completed so far. Used for per-epoch schedules
protected int epochCount = 0;


/**
* @return JSON representation of configuration
Expand Down
Expand Up @@ -79,6 +79,9 @@ public class MultiLayerConfiguration implements Serializable, Cloneable {
// for Spark and model serialization
protected int iterationCount = 0;

//Counter for the number of epochs completed so far. Used for per-epoch schedules
protected int epochCount = 0;

/**
*
* @return JSON representation of NN configuration
Expand Down
Expand Up @@ -25,6 +25,7 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ClassUtils;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.distribution.Distribution;
import org.deeplearning4j.nn.conf.graph.GraphVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
Expand Down Expand Up @@ -120,6 +121,9 @@ public class NeuralNetConfiguration implements Serializable, Cloneable {
// for Spark and model serialization
protected int iterationCount = 0;

//Counter for the number of epochs completed so far. Used for per-epoch schedules
protected int epochCount = 0;

private static ObjectMapper mapper = initMapper();
private static final ObjectMapper mapperYaml = initMapperYaml();
private static Set<Class<?>> subtypesClassCache = null;
Expand Down Expand Up @@ -631,6 +635,7 @@ public static class Builder implements Cloneable {
protected double lrPolicySteps = Double.NaN;
protected double lrPolicyPower = Double.NaN;
protected boolean pretrain = false;
protected List<LayerConstraint> constraints = null;

protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.NONE;
protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.SEPARATE;
Expand Down Expand Up @@ -1222,11 +1227,39 @@ public Builder lrPolicyPower(double lrPolicyPower) {
return this;
}

/**
* Sets the convolution mode for convolutional layers, which impacts padding and output sizes.
* See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
* @param convolutionMode Convolution mode to use
*/
public Builder convolutionMode(ConvolutionMode convolutionMode) {
this.convolutionMode = convolutionMode;
return this;
}

/**
* Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization,
* etc). These constraints are applied at each iteration, after the parameters have been updated.
*
* @param constraints Constraints to apply to all layers
*/
public Builder constraints(LayerConstraint... constraints){
return constraints(Arrays.asList(constraints));
}

/**
* Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization,
* etc). These constraints are applied at each iteration, after the parameters have been updated.
*
* @param constraints Constraints to apply to all layers
*/
public Builder constraints(List<LayerConstraint> constraints){
this.constraints = constraints;
return this;
}

private void learningRateValidation(String layerName) {
if (learningRatePolicy != LearningRatePolicy.None && Double.isNaN(lrPolicyDecayRate)) {
//LR policy, if used, should have a decay rate. 2 exceptions: Map for schedule, and Poly + power param
Expand Down Expand Up @@ -1335,7 +1368,7 @@ private void configureLayer(Layer layer) {
}
}
LayerValidation.generalValidation(layerName, layer, useDropConnect, dropOut, l2, l2Bias,
l1, l1Bias, dist);
l1, l1Bias, dist, constraints);
}

private void copyConfigToLayer(String layerName, Layer layer) {
Expand Down