From 666cc11cbff870bf69641dfcb53a8d289158da5d Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 18 Dec 2017 12:56:33 +1100 Subject: [PATCH 01/34] First steps for samediff layer --- .../samediff/SameDiffTest1.java | 145 ++++++++++++++++ .../layers/samediff/BaseSameDiffLayer.java | 161 ++++++++++++++++++ .../nn/params/SameDiffParamInitializer.java | 63 +++++++ 3 files changed, 369 insertions(+) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java new file mode 100644 index 000000000000..b5e8add908aa --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java @@ -0,0 +1,145 @@ +package org.deeplearning4j.samediff; + +import org.junit.Test; +import org.nd4j.autodiff.samediff.SDGraph; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.ops.transforms.Transforms; + +import java.util.HashMap; +import java.util.Map; + +public class SameDiffTest1 { + + @Test + public void test1() { + + SameDiff sd = SameDiff.create(); + + SDVariable input = sd.var("input", new int[]{3,4}); + SDVariable weights = sd.var("weights", new int[]{4,5}); + SDVariable bias = sd.var("bias", new int[]{1,5}); + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + SDGraph g = sd.graph(); + System.out.println(g); + + System.out.println(out); + + + INDArray iInput = Nd4j.rand(3,4); + INDArray iWeights = Nd4j.rand(4,5); + INDArray iBias = Nd4j.rand(1,5); + + INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); + INDArray iOut = Transforms.sigmoid(iZ, true); + + Map values = new HashMap<>(); + values.put("input", iInput); + values.put("weights", iWeights); + values.put("bias", iBias); + + INDArray[] outAct = sd.eval(values); + + System.out.println(); + } + + + @Test + public void test2() { + + SameDiff sd = SameDiff.create(); + + SDVariable input = sd.var("input", new int[]{3,4}); + SDVariable weights = sd.var("weights", new int[]{4,5}); + SDVariable bias = sd.var("bias", new int[]{1,5}); + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + SDGraph g = sd.graph(); + System.out.println(g); + + System.out.println(out); + + + INDArray iInput = Nd4j.rand(3,4); + INDArray iWeights = Nd4j.rand(4,5); + INDArray iBias = Nd4j.rand(1,5); + + INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); + INDArray iOut = Transforms.sigmoid(iZ, true); + + Map values = new HashMap<>(); + values.put("input", iInput); + values.put("weights", iWeights); + values.put("bias", iBias); + + INDArray[] outAct = sd.eval(values); + + System.out.println(); + } + + @Test + public void test3() { + + SameDiff sd = SameDiff.create(); + + INDArray iInput = Nd4j.rand(3,4); + INDArray iWeights = Nd4j.rand(4,5); + INDArray iBias = Nd4j.rand(1,5); + + SDVariable input = sd.var("input", iInput); + SDVariable weights = sd.var("weights", iWeights); + SDVariable bias = sd.var("bias", iBias); + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + + INDArray outAct = sd.execAndEndResult(); + + + + INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); + INDArray iOut = Transforms.sigmoid(iZ, true); + + Map values = new HashMap<>(); + values.put("input", iInput); + values.put("weights", iWeights); + values.put("bias", iBias); + + System.out.println(); + } + + + @Test + public void test4() { + + SameDiff sd = SameDiff.create(); + + INDArray iInput = Nd4j.rand(3,4); + INDArray iWeights = Nd4j.rand(4,5); + INDArray iBias = Nd4j.rand(1,5); + + SDVariable input = sd.var("input", iInput); + SDVariable weights = sd.var("weights", iWeights); + SDVariable bias = sd.var("bias", iBias); + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + + INDArray outArr = out.eval(); + + System.out.println(outArr); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java new file mode 100644 index 000000000000..4391626a2a09 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -0,0 +1,161 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.params.SameDiffParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.learning.config.IUpdater; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +public abstract class BaseSameDiffLayer extends Layer { + + protected double l1; + protected double l2; + protected double l1Bias; + protected double l2Bias; + protected IUpdater updater; + protected IUpdater biasUpdater; + + + private List paramKeys; + + protected BaseSameDiffLayer(Builder builder){ + super(builder); + this.l1 = builder.l1; + this.l2 = builder.l2; + this.l1Bias = builder.l1Bias; + this.l2Bias = builder.l2Bias; + this.updater = builder.updater; + this.biasUpdater = builder.biasUpdater; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams) { + return null; + } + + @Override + public ParamInitializer initializer() { + return SameDiffParamInitializer.getInstance(); + } + + @Override + public abstract InputType getOutputType(int layerIndex, InputType inputType); + + @Override + public abstract void setNIn(InputType inputType, boolean override); + + @Override + public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); + + public abstract List weightKeys(); + + public abstract List biasKeys(); + + @Override + public double getL1ByParam(String paramName) { + return (initializer().isWeightParam(this, paramName) ? l1 : l1Bias); + } + + @Override + public double getL2ByParam(String paramName) { + return (initializer().isWeightParam(this, paramName) ? l2 : l2Bias); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return new LayerMemoryReport(); //TODO + } + + public List paramKeys(){ + if(paramKeys == null){ + List pk = new ArrayList<>(); + pk.addAll(weightKeys()); + pk.addAll(biasKeys()); + paramKeys = pk; + } + return paramKeys; + } + + + public static abstract class Builder> extends Layer.Builder { + + protected double l1 = Double.NaN; + protected double l2 = Double.NaN; + protected double l1Bias = Double.NaN; + protected double l2Bias = Double.NaN; + protected IUpdater updater = null; + protected IUpdater biasUpdater = null; + + /** + * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization + * coefficient for the bias. + */ + public T l1(double l1) { + this.l1 = l1; + return (T) this; + } + + /** + * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization + * coefficient for the bias. + */ + public T l2(double l2) { + this.l2 = l2; + return (T) this; + } + + /** + * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} + */ + public T l1Bias(double l1Bias) { + this.l1Bias = l1Bias; + return (T) this; + } + + /** + * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)} + */ + public T l2Bias(double l2Bias) { + this.l2Bias = l2Bias; + return (T) this; + } + + /** + * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} + * or {@link org.nd4j.linalg.learning.config.Nesterovs} + * + * @param updater Updater to use + */ + public T updater(IUpdater updater) { + this.updater = updater; + return (T) this; + } + + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #updater(IUpdater)} + * + * @param biasUpdater Updater to use for bias parameters + */ + public T biasUpdater(IUpdater biasUpdater){ + this.biasUpdater = biasUpdater; + return (T) this; + } + + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java new file mode 100644 index 000000000000..b42d9fb1cd78 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -0,0 +1,63 @@ +package org.deeplearning4j.nn.params; + +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.List; +import java.util.Map; + +public class SameDiffParamInitializer implements ParamInitializer { + + private static final SameDiffParamInitializer INSTANCE = new SameDiffParamInitializer(); + + public static SameDiffParamInitializer getInstance() { + return INSTANCE; + } + + @Override + public int numParams(NeuralNetConfiguration conf) { + return 0; + } + + @Override + public int numParams(Layer layer) { + return 0; + } + + @Override + public List paramKeys(Layer layer) { + return null; + } + + @Override + public List weightKeys(Layer layer) { + return null; + } + + @Override + public List biasKeys(Layer layer) { + return null; + } + + @Override + public boolean isWeightParam(Layer layer, String key) { + return false; + } + + @Override + public boolean isBiasParam(Layer layer, String key) { + return false; + } + + @Override + public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + return null; + } + + @Override + public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + return null; + } +} From 2a76f5fbfb1b6f4e0ec42a9aff9749e6fe383b76 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 18 Dec 2017 13:37:40 +1100 Subject: [PATCH 02/34] First pass on layer implementation --- .../layers/samediff/BaseSameDiffLayer.java | 36 +++-- .../nn/layers/samediff/SameDiffLayer.java | 126 ++++++++++++++++++ 2 files changed, 151 insertions(+), 11 deletions(-) create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 4391626a2a09..91ee375b1213 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -6,14 +6,18 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.params.SameDiffParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; public abstract class BaseSameDiffLayer extends Layer { @@ -37,17 +41,6 @@ protected BaseSameDiffLayer(Builder builder){ this.biasUpdater = builder.biasUpdater; } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams) { - return null; - } - - @Override - public ParamInitializer initializer() { - return SameDiffParamInitializer.getInstance(); - } - @Override public abstract InputType getOutputType(int layerIndex, InputType inputType); @@ -61,6 +54,27 @@ public ParamInitializer initializer() { public abstract List biasKeys(); + public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + + //================================================================================================================== + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams) { + SameDiffLayer ret = new SameDiffLayer(conf); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return SameDiffParamInitializer.getInstance(); + } + @Override public double getL1ByParam(String paramName) { return (initializer().isWeightParam(this, paramName) ? l1 : l1Bias); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java new file mode 100644 index 000000000000..662a01273209 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -0,0 +1,126 @@ +package org.deeplearning4j.nn.layers.samediff; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; + +import java.util.HashMap; +import java.util.Map; + +public class SameDiffLayer extends AbstractLayer { + + private static final String INPUT_KEY = "input"; + + protected SameDiff sameDiff; + protected String outputKey; + + + public SameDiffLayer(NeuralNetConfiguration conf){ + super(conf); + } + + + + @Override + public Layer clone() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //TODO - properly support noise weight... + } + + @Override + public INDArray activate(boolean training) { + if(sameDiff == null){ + doInit(); + } + + SameDiff sd = sameDiff.getFunction(outputKey); + //Build map: + Map map = new HashMap<>(paramTable()); + map.put(INPUT_KEY, input); + + try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + return sd.execAndEndResult(); + } + } + + @Override + public INDArray preOutput(boolean training) { + return activate(training); + } + + + @Override + public Pair backpropGradient(INDArray epsilon) { + Gradient g = new DefaultGradient(); + + SameDiff sd = sameDiff.getFunction(outputKey); + INDArray dLdIn; + try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){ + sd.execBackwards(); + for(String s : layerConf().paramKeys() ){ + INDArray pg = sd.grad(s).getArr(); + g.gradientForVariable().put(s, pg); + } + + dLdIn = sd.grad(INPUT_KEY).getArr(); + } + + return new Pair<>(g, dLdIn); + } + + @Override + public double calcL2(boolean backpropParamsOnly) { + double l2Sum = 0.0; + for (Map.Entry entry : paramTable().entrySet()) { + double l2 = conf.getL2ByParam(entry.getKey()); + if (l2 > 0) { + double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); + l2Sum += 0.5 * l2 * norm2 * norm2; + } + } + + return l2Sum; + } + + @Override + public double calcL1(boolean backpropParamsOnly) { + double l1Sum = 0.0; + for (Map.Entry entry : paramTable().entrySet()) { + double l1 = conf.getL1ByParam(entry.getKey()); + if (l1 > 0) { + double norm1 = getParam(entry.getKey()).norm1Number().doubleValue(); + l1Sum += l1 * norm1; + } + } + + return l1Sum; + } + + protected void doInit(){ + sameDiff = SameDiff.create(); + Map p = paramTable(); + + int[] inputShape = input.shape().clone(); + inputShape[0] = -1; + SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); //TODO WHAT ABOUT VARIABLE SIZES? + layerConf().defineLayer(sameDiff, inputVar, p); + } +} From 8d02e3a7c2a4d4b8876fdc832bd14810dad109fe Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 18 Dec 2017 14:26:28 +1100 Subject: [PATCH 03/34] Parameter initialization --- .../layers/samediff/BaseSameDiffLayer.java | 4 +- .../nn/layers/samediff/SameDiffLayer.java | 9 ++- .../nn/params/SameDiffParamInitializer.java | 56 ++++++++++++++++--- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 91ee375b1213..bc8f89ef419a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -54,7 +54,9 @@ protected BaseSameDiffLayer(Builder builder){ public abstract List biasKeys(); - public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + public abstract Map paramShapes(); + + public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); //================================================================================================================== diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index 662a01273209..717c537bbbaf 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -14,6 +14,7 @@ import org.nd4j.linalg.primitives.Pair; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; public class SameDiffLayer extends AbstractLayer { @@ -121,6 +122,12 @@ protected void doInit(){ int[] inputShape = input.shape().clone(); inputShape[0] = -1; SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); //TODO WHAT ABOUT VARIABLE SIZES? - layerConf().defineLayer(sameDiff, inputVar, p); + Map paramShapes = layerConf().paramShapes(); + Map params = new LinkedHashMap<>(); + for(String s : layerConf().paramKeys()){ + int[] ps = paramShapes.get(s); + params.put(s, sameDiff.var(s, ps)); + } + layerConf().defineLayer(sameDiff, inputVar, params); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index b42d9fb1cd78..65409373d135 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -3,11 +3,18 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.util.ArrayUtil; +import java.util.Arrays; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; +import static org.nd4j.linalg.indexing.NDArrayIndex.point; + public class SameDiffParamInitializer implements ParamInitializer { private static final SameDiffParamInitializer INSTANCE = new SameDiffParamInitializer(); @@ -18,46 +25,77 @@ public static SameDiffParamInitializer getInstance() { @Override public int numParams(NeuralNetConfiguration conf) { - return 0; + return numParams(conf.getLayer()); } @Override public int numParams(Layer layer) { - return 0; + BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + Map m = sd.paramShapes(); + int n = 0; + for(int[] arr : m.values()){ + n += ArrayUtil.prod(arr); + } + return n; } @Override public List paramKeys(Layer layer) { - return null; + BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + return sd.paramKeys(); } @Override public List weightKeys(Layer layer) { - return null; + BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + return sd.weightKeys(); } @Override public List biasKeys(Layer layer) { - return null; + BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + return sd.biasKeys(); } @Override public boolean isWeightParam(Layer layer, String key) { - return false; + return weightKeys(layer).contains(key); } @Override public boolean isBiasParam(Layer layer, String key) { - return false; + return biasKeys(layer).contains(key); } @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - return null; + BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); + Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView); + if(initializeParams){ + //TODO + throw new RuntimeException("Parameter initialization not yet implemented"); + } + return out; } @Override public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - return null; + BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); + return subsetAndReshape(sd.paramKeys(), sd.paramShapes(), gradientView); + } + + private Map subsetAndReshape(List params, Map paramShapes, INDArray view){ + Map out = new LinkedHashMap<>(); + int soFar = 0; + for(String s : params){ + int[] sh = paramShapes.get(s); + int length = ArrayUtil.prod(sh); + INDArray sub = view.get(point(0), interval(soFar, soFar + length)); + if(!Arrays.equals(sub.shape(), sh)){ + sub = sub.reshape('c', sh); //TODO initialization order + } + out.put(s, sub); + } + return out; } } From c68eda1a493ae1285f26fb42b49f80ccaafb06ca Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 20 Dec 2017 11:02:44 +1100 Subject: [PATCH 04/34] Temp --- .../deeplearning4j/samediff/SameDiffTest1.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java index b5e8add908aa..9bf62b0fdf59 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java @@ -1,13 +1,13 @@ package org.deeplearning4j.samediff; import org.junit.Test; -import org.nd4j.autodiff.samediff.SDGraph; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -26,8 +26,8 @@ public void test1() { SDVariable z = mmul.add("z", bias); SDVariable out = sd.sigmoid("out", z); - SDGraph g = sd.graph(); - System.out.println(g); +// SDGraph g = sd.graph(); +// System.out.println(g); System.out.println(out); @@ -63,8 +63,8 @@ public void test2() { SDVariable z = mmul.add("z", bias); SDVariable out = sd.sigmoid("out", z); - SDGraph g = sd.graph(); - System.out.println(g); +// SDGraph g = sd.graph(); +// System.out.println(g); System.out.println(out); @@ -140,6 +140,11 @@ public void test4() { INDArray outArr = out.eval(); + INDArray exp = iInput.mmul(iWeights).addiRowVector(iBias); + System.out.println(outArr); + System.out.println(Arrays.toString(outArr.dup().data().asFloat())); + System.out.println("Expected:"); + System.out.println(exp); } } From 6a0817332485d5d17396d0bced8da0e726c18959 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 20 Dec 2017 20:01:50 +1100 Subject: [PATCH 05/34] More tests and implementation --- .../deeplearning4j/samediff/SameDiffTest.java | 76 ++++++++++++++ .../samediff/SameDiffTest1.java | 85 +++++++++++++++- .../samediff/testlayers/SameDiffDense.java | 98 +++++++++++++++++++ .../nn/layers/samediff/SameDiffLayer.java | 63 ++++++++++++ .../nn/params/SameDiffParamInitializer.java | 4 +- 5 files changed, 321 insertions(+), 5 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java new file mode 100644 index 000000000000..b7daccb85de5 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -0,0 +1,76 @@ +package org.deeplearning4j.samediff; + +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.samediff.testlayers.SameDiffDense; +import org.junit.Test; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Map; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +public class SameDiffTest { + + @Test + public void testSameDiffDenseBasic(){ + + int nIn = 3; + int nOut = 4; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Map pt1 = net.getLayer(0).paramTable(); + assertNotNull(pt1); + assertEquals(2, pt1.size()); + assertNotNull(pt1.get(DefaultParamInitializer.WEIGHT_KEY)); + assertNotNull(pt1.get(DefaultParamInitializer.BIAS_KEY)); + + assertArrayEquals(new int[]{nIn, nOut}, pt1.get(DefaultParamInitializer.WEIGHT_KEY).shape()); + assertArrayEquals(new int[]{1, nOut}, pt1.get(DefaultParamInitializer.BIAS_KEY).shape()); + } + + @Test + public void testSameDiffDenseForward(){ + + int nIn = 3; + int nOut = 4; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Map pt1 = net.paramTable(); + assertNotNull(pt1); + + System.out.println(pt1); + +// MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() +// .list() +// .layer(new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(nOut).build()) +// .build(); +// +// MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); +// net2.init(); + + + + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java index 9bf62b0fdf59..ed75f28bf5a7 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java @@ -1,15 +1,21 @@ package org.deeplearning4j.samediff; import org.junit.Test; +import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; public class SameDiffTest1 { @@ -122,12 +128,13 @@ public void test3() { @Test public void test4() { + Nd4j.getRandom().setSeed(12345); SameDiff sd = SameDiff.create(); INDArray iInput = Nd4j.rand(3,4); INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.rand(1,5); + INDArray iBias = Nd4j.zeros(1, 5); //Nd4j.rand(1,5); SDVariable input = sd.var("input", iInput); SDVariable weights = sd.var("weights", iWeights); @@ -138,13 +145,83 @@ public void test4() { SDVariable out = sd.sigmoid("out", z); - INDArray outArr = out.eval(); +// INDArray outArr = out.eval(); + Pair, List> m = sd.exec(); + + for(Map.Entry e : m.getFirst().entrySet()){ + System.out.println(e.getKey().getVarName()); + System.out.println(e.getKey().getArr()); + } + + System.out.println("------------\nAll variable values"); + + List variables = sd.variables(); + for(SDVariable s : variables){ + System.out.println(s.getVarName()); + System.out.println(s.getArr()); + } + + System.out.println("------------"); INDArray exp = iInput.mmul(iWeights).addiRowVector(iBias); - System.out.println(outArr); - System.out.println(Arrays.toString(outArr.dup().data().asFloat())); + System.out.println("Input:"); + System.out.println(iInput); + System.out.println("Weights:"); + System.out.println(iWeights); + System.out.println("Bias:"); + System.out.println(iBias); + + System.out.println("------------"); + System.out.println("Expected:"); System.out.println(exp); + System.out.println("Actual:"); +// System.out.println(outArr); +// System.out.println(Arrays.toString(outArr.dup().data().asFloat())); + } + + + @Test + public void test5() { + Nd4j.getRandom().setSeed(12345); + + SameDiff sd = SameDiff.create(); + + INDArray iInput = Nd4j.rand(3,4); + INDArray iWeights = Nd4j.rand(4,5); + INDArray iBias = Nd4j.rand(1,5); + + SDVariable input = sd.var("input", iInput); + SDVariable weights = sd.var("weights", iWeights); + SDVariable bias = sd.var("bias", iBias); + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + System.out.println("------------\nAll variable values"); + + sd.exec(); + + List variables = sd.variables(); + for(SDVariable s : variables){ + System.out.println(s.getVarName()); + System.out.println(s.getArr()); + System.out.println("Data buffer: " + Arrays.toString(s.getArr().data().asFloat())); + } + + System.out.println("------------"); + + List varNames = variables.stream().map(SDVariable::getVarName).collect(Collectors.toList()); + System.out.println("VarNames: " + varNames); //"z" and "out" appear twice + + INDArray expMmul = iInput.mmul(iWeights); + INDArray expZ = expMmul.addRowVector(iBias); + INDArray expOut = Transforms.sigmoid(expZ, true); + + assertEquals(expMmul, mmul.getArr()); + assertEquals(expZ, z.getArr()); + assertEquals(expOut, out.getArr()); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java new file mode 100644 index 000000000000..7055d277b44a --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java @@ -0,0 +1,98 @@ +package org.deeplearning4j.samediff.testlayers; + +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.IActivation; + +import java.util.*; + +public class SameDiffDense extends BaseSameDiffLayer { + + private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); + private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); + private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); + + private final Map paramShapes; + + private int nIn; + private int nOut; + + protected SameDiffDense(Builder builder) { + super(builder); + + nIn = builder.nIn; + nOut = builder.nOut; + + paramShapes = new HashMap<>(); + paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); + paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return null; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if(override){ + this.nIn = ((InputType.InputTypeFeedForward)inputType).getSize(); + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return null; + } + + @Override + public List weightKeys() { + return W_KEYS; + } + + @Override + public List biasKeys() { + return B_KEYS; + } + + @Override + public Map paramShapes() { + return paramShapes; + } + + @Override + public void defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { + SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); + SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + } + + public static class Builder extends BaseSameDiffLayer.Builder { + + private int nIn; + private int nOut; + + public Builder nIn(int nIn){ + this.nIn = nIn; + return this; + } + + public Builder nOut(int nOut){ + this.nOut = nOut; + return this; + } + + @Override + public SameDiffDense build() { + return new SameDiffDense(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index 717c537bbbaf..f40e64ee2ca4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -24,6 +24,10 @@ public class SameDiffLayer extends AbstractLayer { protected SameDiff sameDiff; protected String outputKey; + protected INDArray params; + protected INDArray gradients; + protected Map paramTable; + public SameDiffLayer(NeuralNetConfiguration conf){ super(conf); @@ -115,6 +119,65 @@ public double calcL1(boolean backpropParamsOnly) { return l1Sum; } + /**Returns the parameters of the neural network as a flattened row vector + * @return the parameters of the neural network + */ + @Override + public INDArray params() { + return params; + } + + @Override + public INDArray getParam(String param) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParam(String key, INDArray val) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParams(INDArray params) { + if (params != null) { + throw new UnsupportedOperationException("Not supported"); + } + } + + protected void setParams(INDArray params, char order) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParamsViewArray(INDArray params) { + this.params = params; + } + + @Override + public INDArray getGradientsViewArray() { + return params; + } + + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + this.gradients = gradients; + } + + @Override + public void setParamTable(Map paramTable) { + this.paramTable = paramTable; + } + + @Override + public Map paramTable() { + return paramTable(false); + } + + @Override + public Map paramTable(boolean backpropParamsOnly) { + return paramTable; + } + protected void doInit(){ sameDiff = SameDiff.create(); Map p = paramTable(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 65409373d135..aedafeec927a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -1,5 +1,6 @@ package org.deeplearning4j.nn.params; +import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Layer; @@ -15,6 +16,7 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.interval; import static org.nd4j.linalg.indexing.NDArrayIndex.point; +@Slf4j public class SameDiffParamInitializer implements ParamInitializer { private static final SameDiffParamInitializer INSTANCE = new SameDiffParamInitializer(); @@ -73,7 +75,7 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView); if(initializeParams){ //TODO - throw new RuntimeException("Parameter initialization not yet implemented"); + log.warn("***** SameDiffParamInitializer: Parameter initialization not yet implemented *****"); } return out; } From 84f65065e3237fbb25693c498798431ce1bac2e5 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 21 Dec 2017 11:04:20 +1100 Subject: [PATCH 06/34] More tests/implementation --- deeplearning4j-core/pom.xml | 10 ++ .../deeplearning4j/samediff/SameDiffTest.java | 151 ++++++++++++++++-- .../samediff/testlayers/SameDiffDense.java | 4 +- .../layers/samediff/BaseSameDiffLayer.java | 2 +- .../nn/layers/samediff/SameDiffLayer.java | 38 +++-- 5 files changed, 179 insertions(+), 26 deletions(-) diff --git a/deeplearning4j-core/pom.xml b/deeplearning4j-core/pom.xml index 7ffe676c4ceb..ffefafda17d3 100644 --- a/deeplearning4j-core/pom.xml +++ b/deeplearning4j-core/pom.xml @@ -36,6 +36,16 @@ + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + + diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java index b7daccb85de5..29de92194637 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -1,5 +1,6 @@ package org.deeplearning4j.samediff; +import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -7,19 +8,24 @@ import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +@Slf4j public class SameDiffTest { @Test - public void testSameDiffDenseBasic(){ + public void testSameDiffDenseBasic() { int nIn = 3; int nOut = 4; @@ -32,7 +38,7 @@ public void testSameDiffDenseBasic(){ MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - Map pt1 = net.getLayer(0).paramTable(); + Map pt1 = net.getLayer(0).paramTable(); assertNotNull(pt1); assertEquals(2, pt1.size()); assertNotNull(pt1.get(DefaultParamInitializer.WEIGHT_KEY)); @@ -43,8 +49,9 @@ public void testSameDiffDenseBasic(){ } @Test - public void testSameDiffDenseForward(){ + public void testSameDiffDenseForward() { + int minibatch = 5; int nIn = 3; int nOut = 4; @@ -56,21 +63,143 @@ public void testSameDiffDenseForward(){ MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - Map pt1 = net.paramTable(); + Map pt1 = net.paramTable(); assertNotNull(pt1); System.out.println(pt1); -// MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() -// .list() -// .layer(new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(nOut).build()) -// .build(); -// -// MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); -// net2.init(); + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + net.params().assign(net2.params()); + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + assertEquals(outExp, out); } + @Test + public void testShapeResolutionMinus1() { + + int nIn = 3; + int nOut = 4; + + int minibatch = 3; + +// for(boolean useMinus1 : new boolean[]{false, true}) { + for (boolean useMinus1 : new boolean[]{true}) { + log.info("Starting: {}", (useMinus1 ? "minibatch -1" : "minibatch 3")); + + int[] inShape; + if (useMinus1) { + inShape = new int[]{-1, nIn}; + } else { + inShape = new int[]{minibatch, nIn}; + } + int[] wShape = new int[]{nIn, nOut}; + int[] bShape = new int[]{1, nOut}; + + SameDiff sd = SameDiff.create(); + SDVariable layerInput = sd.var("in", inShape); + SDVariable weights = sd.var("W", wShape); + SDVariable bias = sd.var("b", bShape); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); + INDArray w = Nd4j.rand(wShape); + INDArray b = Nd4j.rand(bShape); + + Map m = new HashMap<>(); + m.put("in", in); + m.put("W", w); + m.put("b", b); + + sd.associateArrayWithVariable(in, sd.getVariable("in")); + sd.associateArrayWithVariable(w, sd.getVariable("W")); + sd.associateArrayWithVariable(b, sd.getVariable("b")); + +// INDArray outArr = sd.execAndEndResult(); + + sd.addAsPlaceHolder("in"); + sd.addAsPlaceHolder("W"); + sd.addAsPlaceHolder("b"); + + sd.execWithPlaceHolder(m); + + INDArray outArr = sd.getVariable("out").getArr(); + + assertArrayEquals(new int[]{minibatch, nOut}, outArr.shape()); + } + } + + @Test + public void debug() { + + int nIn = 3; + int nOut = 4; + + int minibatch = 3; + + int[] inShape = new int[]{-1, nIn}; + int[] wShape = new int[]{nIn, nOut}; + int[] bShape = new int[]{1, nOut}; + + SameDiff sd = SameDiff.create(); + SDVariable layerInput = sd.var("in", inShape); + SDVariable weights = sd.var("W", wShape); + SDVariable bias = sd.var("b", bShape); + + assertArrayEquals(inShape, layerInput.getShape()); + assertArrayEquals(wShape, weights.getShape()); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.sigmoid("out", z); + + INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); + INDArray w = Nd4j.rand(wShape); + INDArray b = Nd4j.rand(bShape); + + Map m = new HashMap<>(); + m.put("in", in); + m.put("W", w); + m.put("b", b); + + sd.associateArrayWithVariable(in, sd.getVariable("in")); + sd.associateArrayWithVariable(w, sd.getVariable("W")); + sd.associateArrayWithVariable(b, sd.getVariable("b")); + +// INDArray outArr = sd.execAndEndResult(); + + sd.addAsPlaceHolder("in"); + sd.addAsPlaceHolder("W"); + sd.addAsPlaceHolder("b"); + + sd.execWithPlaceHolder(m); + + INDArray outArr = sd.getVariable("out").getArr(); + + assertArrayEquals(new int[]{minibatch, nOut}, outArr.shape()); + } + + @Test + public void debug2() { + int[] inShape = new int[]{-1, 3}; + + SameDiff sd = SameDiff.create(); + SDVariable layerInput = sd.var("in", inShape); + + int[] actShape = layerInput.getShape(); //Getting: [1,3] + assertArrayEquals(inShape, actShape); + } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java index 7055d277b44a..56b00d66ac67 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java @@ -66,13 +66,15 @@ public Map paramShapes() { } @Override - public void defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { + public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); SDVariable mmul = sd.mmul("mmul", layerInput, weights); SDVariable z = mmul.add("z", bias); SDVariable out = sd.sigmoid("out", z); + + return Collections.singletonList("out"); } public static class Builder extends BaseSameDiffLayer.Builder { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index bc8f89ef419a..1e181ae62b38 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -56,7 +56,7 @@ protected BaseSameDiffLayer(Builder builder){ public abstract Map paramShapes(); - public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); //================================================================================================================== diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index f40e64ee2ca4..bcd64b1fed48 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -15,6 +15,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; public class SameDiffLayer extends AbstractLayer { @@ -22,7 +23,7 @@ public class SameDiffLayer extends AbstractLayer { private static final String INPUT_KEY = "input"; protected SameDiff sameDiff; - protected String outputKey; + protected List outputKeys; protected INDArray params; protected INDArray gradients; @@ -56,13 +57,15 @@ public INDArray activate(boolean training) { doInit(); } - SameDiff sd = sameDiff.getFunction(outputKey); //Build map: - Map map = new HashMap<>(paramTable()); - map.put(INPUT_KEY, input); +// Map map = new HashMap<>(paramTable()); +// map.put(INPUT_KEY, input); + + sameDiff.associateArrayWithVariable(input, sameDiff.getVariable(INPUT_KEY)); try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - return sd.execAndEndResult(); + INDArray result = sameDiff.execAndEndResult(); + return result; } } @@ -76,16 +79,15 @@ public INDArray preOutput(boolean training) { public Pair backpropGradient(INDArray epsilon) { Gradient g = new DefaultGradient(); - SameDiff sd = sameDiff.getFunction(outputKey); INDArray dLdIn; try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){ - sd.execBackwards(); + sameDiff.execBackwards(); for(String s : layerConf().paramKeys() ){ - INDArray pg = sd.grad(s).getArr(); + INDArray pg = sameDiff.grad(s).getArr(); g.gradientForVariable().put(s, pg); } - dLdIn = sd.grad(INPUT_KEY).getArr(); + dLdIn = sameDiff.grad(INPUT_KEY).getArr(); } return new Pair<>(g, dLdIn); @@ -183,14 +185,24 @@ protected void doInit(){ Map p = paramTable(); int[] inputShape = input.shape().clone(); - inputShape[0] = -1; - SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); //TODO WHAT ABOUT VARIABLE SIZES? +// inputShape[0] = -1; //TODO THIS DOESN'T ENABLE VARIABLE SIZE MINIBATCHES + SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); Map paramShapes = layerConf().paramShapes(); Map params = new LinkedHashMap<>(); for(String s : layerConf().paramKeys()){ int[] ps = paramShapes.get(s); - params.put(s, sameDiff.var(s, ps)); + SDVariable v = sameDiff.var(s, ps); + params.put(s, v); + } + List outputKeys = layerConf().defineLayer(sameDiff, inputVar, params); + if(outputKeys == null || outputKeys.size() != 1){ + throw new IllegalStateException("Invalid output keys: " + outputKeys); } - layerConf().defineLayer(sameDiff, inputVar, params); + + for(Map.Entry e : p.entrySet()){ + sameDiff.associateArrayWithVariable(e.getValue(), sameDiff.getVariable(e.getKey())); + } + + this.outputKeys = outputKeys; } } From c6e4d2ceb1c5a2e0e494b3d72f83ae95f5cee03f Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 21 Dec 2017 14:40:05 +1100 Subject: [PATCH 07/34] More tests, add activation fn config to SameDiffDense --- .../java/org/deeplearning4j/TestUtils.java | 16 ++ .../deeplearning4j/samediff/SameDiffTest.java | 236 ++++++++++++++++-- .../samediff/testlayers/SameDiffDense.java | 12 +- .../nn/params/SameDiffParamInitializer.java | 5 +- 4 files changed, 244 insertions(+), 25 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java index 41d972320552..2dd8e1b982f8 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -55,6 +55,22 @@ public static ComputationGraph testModelSerialization(ComputationGraph net){ } } + public static INDArray randomOneHot(int examples, int nOut){ + return randomOneHot(examples, nOut, new Random(12345)); + } + + public static INDArray randomOneHot(int examples, int nOut, long rngSeed){ + return randomOneHot(examples, nOut, new Random(rngSeed)); + } + + public static INDArray randomOneHot(int examples, int nOut, Random rng){ + INDArray arr = Nd4j.create(examples, nOut); + for( int i=0; i pt1 = net.paramTable(); - assertNotNull(pt1); - - System.out.println(pt1); + Activation[] afns = new Activation[]{ + Activation.TANH, Activation.SIGMOID, + Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, +// Activation.CUBE, //Output differs +// Activation.HARDTANH, //NPE +// Activation.RELU //JVM crash + }; + + for(Activation a : afns) { + log.info("Starting test - " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .activation(a) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + } + } - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(nOut).build()) - .build(); + @Test + public void testSameDiffDenseBackward() { - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); + int minibatch = 5; + int nIn = 3; + int nOut = 4; - net.params().assign(net2.params()); + Activation[] afns = new Activation[]{ + Activation.TANH, Activation.SIGMOID, + Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, + Activation.CUBE, //Output differs + Activation.HARDTANH, //NPE + Activation.RELU //JVM crash + }; + + for(Activation a : afns) { + log.info("Starting test - " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .activation(a) + .build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + assertEquals(net2.paramTable(), net.paramTable()); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345); + net.setInput(in); + net2.setInput(in); + net.setLabels(l); + net2.setLabels(l); + + net.computeGradientAndScore(); + net2.computeGradientAndScore(); + + Gradient g = net.gradient(); + Gradient g2 = net.gradient(); + assertEquals(g2.gradient(), g.gradient()); - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - assertEquals(outExp, out); + } } @Test @@ -202,4 +286,110 @@ public void debug2() { int[] actShape = layerInput.getShape(); //Getting: [1,3] assertArrayEquals(inShape, actShape); } + + @Test + public void debugTransforms() { + + Activation[] afns = new Activation[]{ + //First 6 pass + Activation.TANH, Activation.SIGMOID, + Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, + //Next 3 fail + Activation.CUBE, //Output differs + Activation.HARDTANH, //NPE + Activation.RELU //JVM crash + }; + + int nIn = 3; + int nOut = 4; + + int minibatch = 3; + + int[] inShape = new int[]{minibatch, nIn}; + int[] wShape = new int[]{nIn, nOut}; + int[] bShape = new int[]{1, nOut}; + + for( Activation a : afns ){ + log.info("Starting: " + a); + SameDiff sd = SameDiff.create(); + SDVariable layerInput = sd.var("in", inShape); + SDVariable weights = sd.var("W", wShape); + SDVariable bias = sd.var("b", bShape); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + + INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); + INDArray w = Nd4j.rand(wShape); + INDArray b = Nd4j.rand(bShape); + + INDArray exp = in.mmul(w).addiRowVector(b); + + SDVariable out = asSameDiff(a, "out", sd, z, exp); + + Map m = new HashMap<>(); + m.put("in", in); + m.put("W", w); + m.put("b", b); + + sd.associateArrayWithVariable(in, sd.getVariable("in")); + sd.associateArrayWithVariable(w, sd.getVariable("W")); + sd.associateArrayWithVariable(b, sd.getVariable("b")); + + sd.addAsPlaceHolder("in"); + sd.addAsPlaceHolder("W"); + sd.addAsPlaceHolder("b"); + + sd.execWithPlaceHolder(m); + + INDArray outArr = sd.getVariable("out").getArr(); + + assertEquals(exp, outArr); + } + } + + public static SDVariable asSameDiff(Activation a, String variableName, SameDiff sd, SDVariable input, INDArray input2){ + switch (a){ + case CUBE: + Transforms.pow(input2, 3, false); + return sd.pow(variableName, input, 3.0); + case ELU: + Transforms.elu(input2, false); + return sd.elu(variableName, input); + case HARDTANH: + Transforms.hardTanh(input2, false); + return sd.hardTanh(variableName, input); + case IDENTITY: + return input.add(variableName, 0.0); //Hack to get new variable with same content + case LEAKYRELU: + Transforms.leakyRelu(input2, false); + return sd.leakyRelu(variableName, input, 0.0); + case RELU: + Transforms.relu(input2, false); + return sd.relu(variableName, input, 0.0); + case SIGMOID: + Transforms.sigmoid(input2, false); + return sd.sigmoid(variableName, input); + case SOFTMAX: + Transforms.softmax(input2, false); + return sd.softmax(variableName, input); + case SOFTPLUS: + Transforms.softPlus(input2, false); + return sd.softplus(variableName, input); + case SOFTSIGN: + Transforms.softsign(input2, false); + return sd.softsign(variableName, input); + case TANH: + Transforms.tanh(input2, false); + return sd.tanh(variableName, input); + case HARDSIGMOID: + case RATIONALTANH: + case RRELU: + case RECTIFIEDTANH: + case SELU: + case SWISH: + default: + throw new UnsupportedOperationException("Activation function not yet supported: " + a); + } + } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java index 56b00d66ac67..496686c40898 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java @@ -7,6 +7,7 @@ import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import java.util.*; @@ -21,12 +22,14 @@ public class SameDiffDense extends BaseSameDiffLayer { private int nIn; private int nOut; + private Activation activation; protected SameDiffDense(Builder builder) { super(builder); nIn = builder.nIn; nOut = builder.nOut; + activation = builder.activation; paramShapes = new HashMap<>(); paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); @@ -72,7 +75,8 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, Map { private int nIn; private int nOut; + private Activation activation = Activation.TANH; public Builder nIn(int nIn){ this.nIn = nIn; @@ -92,6 +97,11 @@ public Builder nOut(int nOut){ return this; } + public Builder activation(Activation activation){ + this.activation = activation; + return this; + } + @Override public SameDiffDense build() { return new SameDiffDense(this); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index aedafeec927a..01dea6792bc1 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -5,6 +5,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.util.ArrayUtil; @@ -94,9 +95,11 @@ private Map subsetAndReshape(List params, Map Date: Thu, 21 Dec 2017 20:41:02 +1100 Subject: [PATCH 08/34] First steps on samediff conv layer --- .../samediff/SameDiffGradTest.java | 64 +++++ .../deeplearning4j/samediff/SameDiffTest.java | 231 +++++++++++------- .../samediff/testlayers/SameDiffConv.java | 135 ++++++++++ .../nn/params/SameDiffParamInitializer.java | 5 + 4 files changed, 344 insertions(+), 91 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java new file mode 100644 index 000000000000..34bcc022b73d --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java @@ -0,0 +1,64 @@ +package org.deeplearning4j.samediff; + +import org.junit.Test; +import org.nd4j.autodiff.functions.DifferentialFunction; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class SameDiffGradTest { + + @Test + public void test1(){ + Nd4j.getRandom().setSeed(12345); + INDArray inArr = Nd4j.rand(1,4); + + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", inArr); + SDVariable s = sd.tanh("s", in); + + INDArray out = sd.execAndEndResult(); + INDArray outEx = Transforms.tanh(inArr, true); + + assertEquals(outEx, out); + System.out.println(out); + + System.out.println("------------------"); + + List vs = sd.variables(); + for(SDVariable sdv : vs){ +// if(sdv.getVarName().equals("in")){ +// System.out.println(sdv.getVarName() + "\n" + sdv.getArr()); +// } else { +// System.out.println(sdv.getVarName() + " - inputs: " + Arrays.toString(sd.getInputsForFunction(sdv)) + "\n" + sdv.getArr()); +// } + System.out.println(sdv.getVarName() + "\n" + sdv.getArr()); + } + + System.out.println("------------------"); + + Pair,List> p = sd.execBackwards(); + + System.out.println("------------------"); + + System.out.println("GRAD variables:"); + SameDiff grad = sd.getFunction("grad"); + for(SDVariable sdv : grad.variables()){ + System.out.println(sdv.getVarName() + " - inputs: " + Arrays.toString(sd.getInputsForFunction(sdv)) + "\n" + sdv.getArr()); + } + + System.out.println("------------------"); + + + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java index 73ca2c3fff94..19e97224b2b2 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -14,6 +14,7 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.blas.params.MMulTranspose; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -56,116 +57,118 @@ public void testSameDiffDenseBasic() { @Test public void testSameDiffDenseForward() { - int minibatch = 5; - int nIn = 3; - int nOut = 4; + for (int minibatch : new int[]{5, 1}) { + int nIn = 3; + int nOut = 4; - Activation[] afns = new Activation[]{ - Activation.TANH, Activation.SIGMOID, - Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, + Activation[] afns = new Activation[]{ + Activation.TANH, Activation.SIGMOID, + Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, // Activation.CUBE, //Output differs // Activation.HARDTANH, //NPE // Activation.RELU //JVM crash - }; + }; - for(Activation a : afns) { - log.info("Starting test - " + a); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) - .activation(a) - .build()) - .build(); + for (Activation a : afns) { + log.info("Starting test - " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .activation(a) + .build()) + .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); - assertNotNull(net.paramTable()); + assertNotNull(net.paramTable()); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .build(); + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .build(); - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); - net.params().assign(net2.params()); + net.params().assign(net2.params()); - //Check params: - assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(params2, params1); + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); - assertEquals(outExp, out); + assertEquals(outExp, out); + } } } @Test public void testSameDiffDenseBackward() { - int minibatch = 5; int nIn = 3; int nOut = 4; - Activation[] afns = new Activation[]{ - Activation.TANH, Activation.SIGMOID, - Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, - Activation.CUBE, //Output differs - Activation.HARDTANH, //NPE - Activation.RELU //JVM crash - }; - - for(Activation a : afns) { - log.info("Starting test - " + a); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) - .activation(a) - .build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) - .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) - .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(net2.params(), net.params()); - assertEquals(net2.paramTable(), net.paramTable()); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345); - net.setInput(in); - net2.setInput(in); - net.setLabels(l); - net2.setLabels(l); - - net.computeGradientAndScore(); - net2.computeGradientAndScore(); - - Gradient g = net.gradient(); - Gradient g2 = net.gradient(); - assertEquals(g2.gradient(), g.gradient()); - - + for (int minibatch : new int[]{5, 1}) { + + Activation[] afns = new Activation[]{ + Activation.TANH, +// Activation.SIGMOID, +// Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, +// Activation.CUBE, //Output differs +// Activation.HARDTANH, //NPE +// Activation.RELU //JVM crash + }; + + for (Activation a : afns) { + log.info("Starting test - " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .activation(a) + .build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + assertEquals(net2.paramTable(), net.paramTable()); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345); + net.setInput(in); + net2.setInput(in); + net.setLabels(l); + net2.setLabels(l); + + net.computeGradientAndScore(); + net2.computeGradientAndScore(); + + Gradient g = net.gradient(); + Gradient g2 = net2.gradient(); + assertEquals(g2.gradient(), g.gradient()); + } } } @@ -309,7 +312,7 @@ public void debugTransforms() { int[] wShape = new int[]{nIn, nOut}; int[] bShape = new int[]{1, nOut}; - for( Activation a : afns ){ + for (Activation a : afns) { log.info("Starting: " + a); SameDiff sd = SameDiff.create(); SDVariable layerInput = sd.var("in", inShape); @@ -348,8 +351,8 @@ public void debugTransforms() { } } - public static SDVariable asSameDiff(Activation a, String variableName, SameDiff sd, SDVariable input, INDArray input2){ - switch (a){ + public static SDVariable asSameDiff(Activation a, String variableName, SameDiff sd, SDVariable input, INDArray input2) { + switch (a) { case CUBE: Transforms.pow(input2, 3, false); return sd.pow(variableName, input, 3.0); @@ -392,4 +395,50 @@ public static SDVariable asSameDiff(Activation a, String variableName, SameDiff throw new UnsupportedOperationException("Activation function not yet supported: " + a); } } + + + @Test + public void debugMmul() { + + INDArray first = Nd4j.linspace(1, 3, 3); + INDArray second = Nd4j.linspace(4, 7, 4); + + SameDiff sd = SameDiff.create(); + SDVariable f = sd.var("in1", first); + SDVariable s = sd.var("in2", second); + SDVariable fTranspose = sd.transpose(f); + SDVariable mmul = sd.mmul("mmul", fTranspose, s); + + INDArray out = sd.execAndEndResult(); + + INDArray exp = first.transpose().mmul(second); + assertEquals(exp, out); + } + + @Test + public void debugMmul2() { + //Here: [1,3]^T * [1,4] = [3,4] + + INDArray first = Nd4j.linspace(1, 3, 3); + INDArray second = Nd4j.linspace(4, 7, 4); + + SameDiff sd = SameDiff.create(); + SDVariable f = sd.var("in1", first); + SDVariable s = sd.var("in2", second); + + MMulTranspose mt = MMulTranspose.builder() + .transposeA(true) + .transposeB(false) + .transposeResult(false) + .a(first) + .b(second) + .build(); + SDVariable mmul = sd.f().mmul(f, s, mt); + sd.updateVariableNameAndReference(mmul, "mmul"); + + INDArray out = sd.execAndEndResult(); + + INDArray exp = first.transpose().mmul(second); + assertEquals(exp, out); + } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java new file mode 100644 index 000000000000..3df32d6a54e0 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -0,0 +1,135 @@ +package org.deeplearning4j.samediff.testlayers; + +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.InputTypeUtil; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.util.ConvolutionUtils; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class SameDiffConv extends BaseSameDiffLayer { + + private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); + private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); + + + private int nIn; + private int nOut; + private Activation activation; + private int[] kernel; + private int[] stride; + private int[] padding; + private ConvolutionMode cm; + + protected SameDiffConv(Builder b) { + super(b); + this.nIn = b.nIn; + this.nOut = b.nOut; + this.activation = b.activation; + this.kernel = b.kernel; + this.stride = b.stride; + this.padding = b.padding; + this.cm = b.cm; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional)inputType; + return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[]{1,1}, + cm, nOut, layerIndex, getLayerName(), SameDiffConv.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (nIn <= 0 || override) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.nIn = c.getDepth(); + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + } + + @Override + public List weightKeys() { + return WEIGHT_KEYS; + } + + @Override + public List biasKeys() { + return BIAS_KEYS; + } + + @Override + public Map paramShapes() { + return null; + } + + @Override + public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { + return null; + } + + public static class Builder extends BaseSameDiffLayer.Builder { + + private int nIn; + private int nOut; + private Activation activation = Activation.TANH; + private int[] kernel = new int[]{2,2}; + private int[] stride = new int[]{1,1}; + private int[] padding = new int[]{0,0}; + private ConvolutionMode cm = ConvolutionMode.Same; + + public Builder nIn(int nIn){ + this.nIn = nIn; + return this; + } + + public Builder nOut(int nOut){ + this.nOut = nOut; + return this; + } + + public Builder activation(Activation activation){ + this.activation = activation; + return this; + } + + public Builder kernel(int... k){ + this.kernel = k; + return this; + } + + public Builder stride(int... s){ + this.stride = s; + return this; + } + + public Builder padding(int... p){ + this.padding = p; + return this; + } + + public Builder convolutionMode(ConvolutionMode cm){ + this.cm = cm; + return this; + } + + @Override + public SameDiffConv build() { + return new SameDiffConv(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 01dea6792bc1..fb0bbc512c22 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -78,6 +78,11 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi //TODO log.warn("***** SameDiffParamInitializer: Parameter initialization not yet implemented *****"); } + + for(String s : sd.paramKeys()){ + conf.addVariable(s); + } + return out; } From c9bc902382127278aa546f7acf0845b929ee6d24 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 22 Dec 2017 18:31:08 +1100 Subject: [PATCH 09/34] More tests --- .../deeplearning4j/samediff/SameDiffTest.java | 20 +++++++++++-------- .../samediff/testlayers/SameDiffConv.java | 19 ++++++++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java index 19e97224b2b2..d19f1c1c6c88 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -62,10 +62,14 @@ public void testSameDiffDenseForward() { int nOut = 4; Activation[] afns = new Activation[]{ - Activation.TANH, Activation.SIGMOID, - Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, -// Activation.CUBE, //Output differs -// Activation.HARDTANH, //NPE + Activation.TANH, + Activation.SIGMOID, + Activation.ELU, + Activation.IDENTITY, + Activation.SOFTPLUS, + Activation.SOFTSIGN, +// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.HARDTANH, //NPE // Activation.RELU //JVM crash }; @@ -118,10 +122,10 @@ public void testSameDiffDenseBackward() { Activation[] afns = new Activation[]{ Activation.TANH, -// Activation.SIGMOID, -// Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, -// Activation.CUBE, //Output differs -// Activation.HARDTANH, //NPE + Activation.SIGMOID, + Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, + Activation.HARDTANH, +// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 // Activation.RELU //JVM crash }; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index 3df32d6a54e0..5d3a46200d51 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -12,17 +12,13 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; public class SameDiffConv extends BaseSameDiffLayer { private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); - private int nIn; private int nOut; private Activation activation; @@ -31,6 +27,8 @@ public class SameDiffConv extends BaseSameDiffLayer { private int[] padding; private ConvolutionMode cm; + private Map paramShapes; + protected SameDiffConv(Builder b) { super(b); this.nIn = b.nIn; @@ -74,11 +72,20 @@ public List biasKeys() { @Override public Map paramShapes() { - return null; + if(paramShapes == null) { + int[] weightsShape = new int[]{nIn, nOut, kernel[0], kernel[1]}; + int[] biasShape = new int[]{1, nOut}; + Map m = new HashMap<>(); + m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); + m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); + paramShapes = m; + } + return paramShapes; } @Override public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { +// sameDiff.conv2d() return null; } From e67ca0f1f1aef56ac2b9b3d898d74f2dc512bf77 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 22 Dec 2017 18:36:22 +1100 Subject: [PATCH 10/34] Test clean up --- .../deeplearning4j/samediff/SameDiffTest.java | 275 +----------------- 1 file changed, 9 insertions(+), 266 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java index d19f1c1c6c88..4e2e65ad0da1 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -171,278 +171,21 @@ public void testSameDiffDenseBackward() { Gradient g = net.gradient(); Gradient g2 = net2.gradient(); - assertEquals(g2.gradient(), g.gradient()); - } - } - } - @Test - public void testShapeResolutionMinus1() { + Map m1 = g.gradientForVariable(); + Map m2 = g2.gradientForVariable(); - int nIn = 3; - int nOut = 4; + assertEquals(m2.keySet(), m1.keySet()); - int minibatch = 3; + for(String s : m1.keySet()){ + INDArray i1 = m1.get(s); + INDArray i2 = m2.get(s); -// for(boolean useMinus1 : new boolean[]{false, true}) { - for (boolean useMinus1 : new boolean[]{true}) { - log.info("Starting: {}", (useMinus1 ? "minibatch -1" : "minibatch 3")); + assertEquals(s, i2, i1); + } - int[] inShape; - if (useMinus1) { - inShape = new int[]{-1, nIn}; - } else { - inShape = new int[]{minibatch, nIn}; + assertEquals(g2.gradient(), g.gradient()); } - int[] wShape = new int[]{nIn, nOut}; - int[] bShape = new int[]{1, nOut}; - - SameDiff sd = SameDiff.create(); - SDVariable layerInput = sd.var("in", inShape); - SDVariable weights = sd.var("W", wShape); - SDVariable bias = sd.var("b", bShape); - - SDVariable mmul = sd.mmul("mmul", layerInput, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - - INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); - INDArray w = Nd4j.rand(wShape); - INDArray b = Nd4j.rand(bShape); - - Map m = new HashMap<>(); - m.put("in", in); - m.put("W", w); - m.put("b", b); - - sd.associateArrayWithVariable(in, sd.getVariable("in")); - sd.associateArrayWithVariable(w, sd.getVariable("W")); - sd.associateArrayWithVariable(b, sd.getVariable("b")); - -// INDArray outArr = sd.execAndEndResult(); - - sd.addAsPlaceHolder("in"); - sd.addAsPlaceHolder("W"); - sd.addAsPlaceHolder("b"); - - sd.execWithPlaceHolder(m); - - INDArray outArr = sd.getVariable("out").getArr(); - - assertArrayEquals(new int[]{minibatch, nOut}, outArr.shape()); } } - - @Test - public void debug() { - - int nIn = 3; - int nOut = 4; - - int minibatch = 3; - - int[] inShape = new int[]{-1, nIn}; - int[] wShape = new int[]{nIn, nOut}; - int[] bShape = new int[]{1, nOut}; - - SameDiff sd = SameDiff.create(); - SDVariable layerInput = sd.var("in", inShape); - SDVariable weights = sd.var("W", wShape); - SDVariable bias = sd.var("b", bShape); - - assertArrayEquals(inShape, layerInput.getShape()); - assertArrayEquals(wShape, weights.getShape()); - - SDVariable mmul = sd.mmul("mmul", layerInput, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - - INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); - INDArray w = Nd4j.rand(wShape); - INDArray b = Nd4j.rand(bShape); - - Map m = new HashMap<>(); - m.put("in", in); - m.put("W", w); - m.put("b", b); - - sd.associateArrayWithVariable(in, sd.getVariable("in")); - sd.associateArrayWithVariable(w, sd.getVariable("W")); - sd.associateArrayWithVariable(b, sd.getVariable("b")); - -// INDArray outArr = sd.execAndEndResult(); - - sd.addAsPlaceHolder("in"); - sd.addAsPlaceHolder("W"); - sd.addAsPlaceHolder("b"); - - sd.execWithPlaceHolder(m); - - INDArray outArr = sd.getVariable("out").getArr(); - - assertArrayEquals(new int[]{minibatch, nOut}, outArr.shape()); - } - - @Test - public void debug2() { - int[] inShape = new int[]{-1, 3}; - - SameDiff sd = SameDiff.create(); - SDVariable layerInput = sd.var("in", inShape); - - int[] actShape = layerInput.getShape(); //Getting: [1,3] - assertArrayEquals(inShape, actShape); - } - - @Test - public void debugTransforms() { - - Activation[] afns = new Activation[]{ - //First 6 pass - Activation.TANH, Activation.SIGMOID, - Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, - //Next 3 fail - Activation.CUBE, //Output differs - Activation.HARDTANH, //NPE - Activation.RELU //JVM crash - }; - - int nIn = 3; - int nOut = 4; - - int minibatch = 3; - - int[] inShape = new int[]{minibatch, nIn}; - int[] wShape = new int[]{nIn, nOut}; - int[] bShape = new int[]{1, nOut}; - - for (Activation a : afns) { - log.info("Starting: " + a); - SameDiff sd = SameDiff.create(); - SDVariable layerInput = sd.var("in", inShape); - SDVariable weights = sd.var("W", wShape); - SDVariable bias = sd.var("b", bShape); - - SDVariable mmul = sd.mmul("mmul", layerInput, weights); - SDVariable z = mmul.add("z", bias); - - INDArray in = Nd4j.rand(new int[]{minibatch, nIn}); - INDArray w = Nd4j.rand(wShape); - INDArray b = Nd4j.rand(bShape); - - INDArray exp = in.mmul(w).addiRowVector(b); - - SDVariable out = asSameDiff(a, "out", sd, z, exp); - - Map m = new HashMap<>(); - m.put("in", in); - m.put("W", w); - m.put("b", b); - - sd.associateArrayWithVariable(in, sd.getVariable("in")); - sd.associateArrayWithVariable(w, sd.getVariable("W")); - sd.associateArrayWithVariable(b, sd.getVariable("b")); - - sd.addAsPlaceHolder("in"); - sd.addAsPlaceHolder("W"); - sd.addAsPlaceHolder("b"); - - sd.execWithPlaceHolder(m); - - INDArray outArr = sd.getVariable("out").getArr(); - - assertEquals(exp, outArr); - } - } - - public static SDVariable asSameDiff(Activation a, String variableName, SameDiff sd, SDVariable input, INDArray input2) { - switch (a) { - case CUBE: - Transforms.pow(input2, 3, false); - return sd.pow(variableName, input, 3.0); - case ELU: - Transforms.elu(input2, false); - return sd.elu(variableName, input); - case HARDTANH: - Transforms.hardTanh(input2, false); - return sd.hardTanh(variableName, input); - case IDENTITY: - return input.add(variableName, 0.0); //Hack to get new variable with same content - case LEAKYRELU: - Transforms.leakyRelu(input2, false); - return sd.leakyRelu(variableName, input, 0.0); - case RELU: - Transforms.relu(input2, false); - return sd.relu(variableName, input, 0.0); - case SIGMOID: - Transforms.sigmoid(input2, false); - return sd.sigmoid(variableName, input); - case SOFTMAX: - Transforms.softmax(input2, false); - return sd.softmax(variableName, input); - case SOFTPLUS: - Transforms.softPlus(input2, false); - return sd.softplus(variableName, input); - case SOFTSIGN: - Transforms.softsign(input2, false); - return sd.softsign(variableName, input); - case TANH: - Transforms.tanh(input2, false); - return sd.tanh(variableName, input); - case HARDSIGMOID: - case RATIONALTANH: - case RRELU: - case RECTIFIEDTANH: - case SELU: - case SWISH: - default: - throw new UnsupportedOperationException("Activation function not yet supported: " + a); - } - } - - - @Test - public void debugMmul() { - - INDArray first = Nd4j.linspace(1, 3, 3); - INDArray second = Nd4j.linspace(4, 7, 4); - - SameDiff sd = SameDiff.create(); - SDVariable f = sd.var("in1", first); - SDVariable s = sd.var("in2", second); - SDVariable fTranspose = sd.transpose(f); - SDVariable mmul = sd.mmul("mmul", fTranspose, s); - - INDArray out = sd.execAndEndResult(); - - INDArray exp = first.transpose().mmul(second); - assertEquals(exp, out); - } - - @Test - public void debugMmul2() { - //Here: [1,3]^T * [1,4] = [3,4] - - INDArray first = Nd4j.linspace(1, 3, 3); - INDArray second = Nd4j.linspace(4, 7, 4); - - SameDiff sd = SameDiff.create(); - SDVariable f = sd.var("in1", first); - SDVariable s = sd.var("in2", second); - - MMulTranspose mt = MMulTranspose.builder() - .transposeA(true) - .transposeB(false) - .transposeResult(false) - .a(first) - .b(second) - .build(); - SDVariable mmul = sd.f().mmul(f, s, mt); - sd.updateVariableNameAndReference(mmul, "mmul"); - - INDArray out = sd.execAndEndResult(); - - INDArray exp = first.transpose().mmul(second); - assertEquals(exp, out); - } } From 9e32a4494c577d986a13ec1c4f15acd2d8ecfdaf Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 22 Dec 2017 19:32:09 +1100 Subject: [PATCH 11/34] Fix + test serialization; add global config inheritance for samediff layers --- .../deeplearning4j/samediff/SameDiffTest.java | 6 +++ .../samediff/testlayers/SameDiffConv.java | 6 +++ .../samediff/testlayers/SameDiffDense.java | 30 ++++++++++--- .../nn/conf/NeuralNetConfiguration.java | 6 +++ .../layers/samediff/BaseSameDiffLayer.java | 42 +++++++++++++++++++ 5 files changed, 85 insertions(+), 5 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java index 4e2e65ad0da1..b7b35e370285 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java @@ -108,6 +108,12 @@ public void testSameDiffDenseForward() { INDArray outExp = net2.output(in); assertEquals(outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); } } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index 5d3a46200d51..368fa35ad556 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -2,6 +2,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; @@ -89,6 +90,11 @@ public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map { private int nIn; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java index 496686c40898..865cae9a8b9f 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java @@ -1,6 +1,10 @@ package org.deeplearning4j.samediff.testlayers; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; @@ -9,16 +13,20 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import java.util.*; +@Data +@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@JsonIgnoreProperties("paramShapes") public class SameDiffDense extends BaseSameDiffLayer { private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); - private final Map paramShapes; + private Map paramShapes; private int nIn; private int nOut; @@ -30,10 +38,10 @@ protected SameDiffDense(Builder builder) { nIn = builder.nIn; nOut = builder.nOut; activation = builder.activation; + } - paramShapes = new HashMap<>(); - paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); - paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); + private SameDiffDense(){ + //No op constructor for Jackson } @Override @@ -65,6 +73,11 @@ public List biasKeys() { @Override public Map paramShapes() { + if(paramShapes == null){ + paramShapes = new HashMap<>(); + paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); + paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); + } return paramShapes; } @@ -81,11 +94,18 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, Map { private int nIn; private int nOut; - private Activation activation = Activation.TANH; + private Activation activation; public Builder nIn(int nIn){ this.nIn = nIn; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index 420f294ca52e..7ea684361e00 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -34,6 +34,7 @@ import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; import org.deeplearning4j.nn.conf.layers.variational.ReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.conf.serde.ComputationGraphConfigurationDeserializer; @@ -1077,6 +1078,11 @@ private void configureLayer(Layer layer) { else layerName = layer.getLayerName(); + if(layer instanceof BaseSameDiffLayer){ + BaseSameDiffLayer sdl = (BaseSameDiffLayer)layer; + sdl.applyGlobalConfig(this); + } + if (layer != null) { copyConfigToLayer(layerName, layer); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 1e181ae62b38..09613d77b5a9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -1,5 +1,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -19,6 +21,8 @@ import java.util.List; import java.util.Map; +@Data +@EqualsAndHashCode(callSuper = true) public abstract class BaseSameDiffLayer extends Layer { protected double l1; @@ -41,6 +45,10 @@ protected BaseSameDiffLayer(Builder builder){ this.biasUpdater = builder.biasUpdater; } + protected BaseSameDiffLayer(){ + //No op constructor for Jackson + } + @Override public abstract InputType getOutputType(int layerIndex, InputType inputType); @@ -58,6 +66,8 @@ protected BaseSameDiffLayer(Builder builder){ public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); + //================================================================================================================== @Override @@ -87,6 +97,16 @@ public double getL2ByParam(String paramName) { return (initializer().isWeightParam(this, paramName) ? l2 : l2Bias); } + @Override + public IUpdater getUpdaterByParam(String paramName){ + if(biasUpdater != null && initializer().isBiasParam(this, paramName)){ + return biasUpdater; + } else if(initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)){ + return updater; + } + throw new IllegalStateException("Unknown parameter key: " + paramName); + } + @Override public boolean isPretrainParam(String paramName) { return false; @@ -107,6 +127,28 @@ public List paramKeys(){ return paramKeys; } + public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ + if(Double.isNaN(l1)){ + l1 = b.getL1(); + } + if(Double.isNaN(l2)){ + l2 = b.getL2(); + } + if(Double.isNaN(l1Bias)){ + l1Bias = b.getL1Bias(); + } + if(Double.isNaN(l2Bias)){ + l2Bias = b.getL2Bias(); + } + if(updater == null){ + updater = b.getIUpdater(); + } + if(biasUpdater == null){ + biasUpdater = b.getBiasUpdater(); + } + + applyGlobalConfigToLayer(b); + } public static abstract class Builder> extends Layer.Builder { From b9e5b486afe99862a156070031f080cf4695ce66 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 22 Dec 2017 21:08:46 +1100 Subject: [PATCH 12/34] SameDiff Conv layer test; samediff param initializer reshape order config --- .../samediff/SameDiffGradTest.java | 64 ----- .../samediff/SameDiffTest1.java | 227 ------------------ .../samediff/SameDiffTestConv.java | 149 ++++++++++++ ...meDiffTest.java => SameDiffTestDense.java} | 2 +- .../samediff/testlayers/SameDiffConv.java | 81 +++++-- .../layers/samediff/BaseSameDiffLayer.java | 4 + .../nn/params/SameDiffParamInitializer.java | 9 +- 7 files changed, 221 insertions(+), 315 deletions(-) delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java rename deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/{SameDiffTest.java => SameDiffTestDense.java} (99%) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java deleted file mode 100644 index 34bcc022b73d..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffGradTest.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.deeplearning4j.samediff; - -import org.junit.Test; -import org.nd4j.autodiff.functions.DifferentialFunction; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.linalg.primitives.Pair; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -public class SameDiffGradTest { - - @Test - public void test1(){ - Nd4j.getRandom().setSeed(12345); - INDArray inArr = Nd4j.rand(1,4); - - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("in", inArr); - SDVariable s = sd.tanh("s", in); - - INDArray out = sd.execAndEndResult(); - INDArray outEx = Transforms.tanh(inArr, true); - - assertEquals(outEx, out); - System.out.println(out); - - System.out.println("------------------"); - - List vs = sd.variables(); - for(SDVariable sdv : vs){ -// if(sdv.getVarName().equals("in")){ -// System.out.println(sdv.getVarName() + "\n" + sdv.getArr()); -// } else { -// System.out.println(sdv.getVarName() + " - inputs: " + Arrays.toString(sd.getInputsForFunction(sdv)) + "\n" + sdv.getArr()); -// } - System.out.println(sdv.getVarName() + "\n" + sdv.getArr()); - } - - System.out.println("------------------"); - - Pair,List> p = sd.execBackwards(); - - System.out.println("------------------"); - - System.out.println("GRAD variables:"); - SameDiff grad = sd.getFunction("grad"); - for(SDVariable sdv : grad.variables()){ - System.out.println(sdv.getVarName() + " - inputs: " + Arrays.toString(sd.getInputsForFunction(sdv)) + "\n" + sdv.getArr()); - } - - System.out.println("------------------"); - - - } - -} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java deleted file mode 100644 index ed75f28bf5a7..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest1.java +++ /dev/null @@ -1,227 +0,0 @@ -package org.deeplearning4j.samediff; - -import org.junit.Test; -import org.nd4j.autodiff.functions.DifferentialFunction; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.linalg.primitives.Pair; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import static org.junit.Assert.assertEquals; - -public class SameDiffTest1 { - - @Test - public void test1() { - - SameDiff sd = SameDiff.create(); - - SDVariable input = sd.var("input", new int[]{3,4}); - SDVariable weights = sd.var("weights", new int[]{4,5}); - SDVariable bias = sd.var("bias", new int[]{1,5}); - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - -// SDGraph g = sd.graph(); -// System.out.println(g); - - System.out.println(out); - - - INDArray iInput = Nd4j.rand(3,4); - INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.rand(1,5); - - INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); - INDArray iOut = Transforms.sigmoid(iZ, true); - - Map values = new HashMap<>(); - values.put("input", iInput); - values.put("weights", iWeights); - values.put("bias", iBias); - - INDArray[] outAct = sd.eval(values); - - System.out.println(); - } - - - @Test - public void test2() { - - SameDiff sd = SameDiff.create(); - - SDVariable input = sd.var("input", new int[]{3,4}); - SDVariable weights = sd.var("weights", new int[]{4,5}); - SDVariable bias = sd.var("bias", new int[]{1,5}); - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - -// SDGraph g = sd.graph(); -// System.out.println(g); - - System.out.println(out); - - - INDArray iInput = Nd4j.rand(3,4); - INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.rand(1,5); - - INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); - INDArray iOut = Transforms.sigmoid(iZ, true); - - Map values = new HashMap<>(); - values.put("input", iInput); - values.put("weights", iWeights); - values.put("bias", iBias); - - INDArray[] outAct = sd.eval(values); - - System.out.println(); - } - - @Test - public void test3() { - - SameDiff sd = SameDiff.create(); - - INDArray iInput = Nd4j.rand(3,4); - INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.rand(1,5); - - SDVariable input = sd.var("input", iInput); - SDVariable weights = sd.var("weights", iWeights); - SDVariable bias = sd.var("bias", iBias); - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - - - INDArray outAct = sd.execAndEndResult(); - - - - INDArray iZ = iInput.mmul(iWeights).addiRowVector(iBias); - INDArray iOut = Transforms.sigmoid(iZ, true); - - Map values = new HashMap<>(); - values.put("input", iInput); - values.put("weights", iWeights); - values.put("bias", iBias); - - System.out.println(); - } - - - @Test - public void test4() { - Nd4j.getRandom().setSeed(12345); - - SameDiff sd = SameDiff.create(); - - INDArray iInput = Nd4j.rand(3,4); - INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.zeros(1, 5); //Nd4j.rand(1,5); - - SDVariable input = sd.var("input", iInput); - SDVariable weights = sd.var("weights", iWeights); - SDVariable bias = sd.var("bias", iBias); - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - - -// INDArray outArr = out.eval(); - Pair, List> m = sd.exec(); - - for(Map.Entry e : m.getFirst().entrySet()){ - System.out.println(e.getKey().getVarName()); - System.out.println(e.getKey().getArr()); - } - - System.out.println("------------\nAll variable values"); - - List variables = sd.variables(); - for(SDVariable s : variables){ - System.out.println(s.getVarName()); - System.out.println(s.getArr()); - } - - System.out.println("------------"); - - INDArray exp = iInput.mmul(iWeights).addiRowVector(iBias); - - System.out.println("Input:"); - System.out.println(iInput); - System.out.println("Weights:"); - System.out.println(iWeights); - System.out.println("Bias:"); - System.out.println(iBias); - - System.out.println("------------"); - - System.out.println("Expected:"); - System.out.println(exp); - System.out.println("Actual:"); -// System.out.println(outArr); -// System.out.println(Arrays.toString(outArr.dup().data().asFloat())); - } - - - @Test - public void test5() { - Nd4j.getRandom().setSeed(12345); - - SameDiff sd = SameDiff.create(); - - INDArray iInput = Nd4j.rand(3,4); - INDArray iWeights = Nd4j.rand(4,5); - INDArray iBias = Nd4j.rand(1,5); - - SDVariable input = sd.var("input", iInput); - SDVariable weights = sd.var("weights", iWeights); - SDVariable bias = sd.var("bias", iBias); - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.sigmoid("out", z); - - System.out.println("------------\nAll variable values"); - - sd.exec(); - - List variables = sd.variables(); - for(SDVariable s : variables){ - System.out.println(s.getVarName()); - System.out.println(s.getArr()); - System.out.println("Data buffer: " + Arrays.toString(s.getArr().data().asFloat())); - } - - System.out.println("------------"); - - List varNames = variables.stream().map(SDVariable::getVarName).collect(Collectors.toList()); - System.out.println("VarNames: " + varNames); //"z" and "out" appear twice - - INDArray expMmul = iInput.mmul(iWeights); - INDArray expZ = expMmul.addRowVector(iBias); - INDArray expOut = Transforms.sigmoid(expZ, true); - - assertEquals(expMmul, mmul.getArr()); - assertEquals(expZ, z.getArr()); - assertEquals(expOut, out.getArr()); - } -} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java new file mode 100644 index 000000000000..47191cb3ab83 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java @@ -0,0 +1,149 @@ +package org.deeplearning4j.samediff; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.samediff.testlayers.SameDiffConv; +import org.deeplearning4j.samediff.testlayers.SameDiffDense; +import org.junit.Test; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Arrays; +import java.util.Map; + +import static org.junit.Assert.*; + +@Slf4j +public class SameDiffTestConv { + + @Test + public void testSameDiffConvBasic() { + + int nIn = 3; + int nOut = 4; + int kH = 2; + int kW = 3; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffConv.Builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Map pt1 = net.getLayer(0).paramTable(); + assertNotNull(pt1); + assertEquals(2, pt1.size()); + assertNotNull(pt1.get(ConvolutionParamInitializer.WEIGHT_KEY)); + assertNotNull(pt1.get(ConvolutionParamInitializer.BIAS_KEY)); + + assertArrayEquals(new int[]{nOut, nIn, kH, kW}, pt1.get(ConvolutionParamInitializer.WEIGHT_KEY).shape()); + assertArrayEquals(new int[]{1, nOut}, pt1.get(ConvolutionParamInitializer.BIAS_KEY).shape()); + } + + @Test + public void testSameDiffConvForward() { + + int count = 0; + for (int minibatch : new int[]{5, 1}) { + + Activation[] afns = new Activation[]{ + Activation.TANH, + Activation.SIGMOID, + Activation.ELU, + Activation.IDENTITY, + Activation.SOFTPLUS, + Activation.SOFTSIGN, +// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.HARDTANH, //NPE +// Activation.RELU //JVM crash + }; + + for(int nIn : new int[]{3,4}){ + for( int nOut : new int[]{4,5}){ + for( int[] kernel : new int[][]{{2,2}, {2,1}, {3,2}}){ + for( int[] strides : new int[][]{{1,1}, {2,2}, {2,1}}){ + for( int[] dilation : new int[][]{{1,1}, {2,2}, {1,2}}){ + for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}){ + for(Activation a : afns){ + String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn + + ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride=" + + Arrays.toString(strides) + ", dilation=" + Arrays.toString(dilation) + + ", ConvolutionMode=" + cm + ", ActFn=" + a; + log.info("Starting test: " + msg); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffConv.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new ConvolutionLayer.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(msg, net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(msg, params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(msg, outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(msg, outExp, outLoaded); + } + } + } + } + } + } + } + } + } +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java similarity index 99% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java index b7b35e370285..624455e57ced 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java @@ -28,7 +28,7 @@ import static org.junit.Assert.assertNotNull; @Slf4j -public class SameDiffTest { +public class SameDiffTestDense { @Test public void testSameDiffDenseBasic() { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index 368fa35ad556..a2bad93ed201 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -12,6 +12,7 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; import java.util.*; @@ -19,6 +20,8 @@ public class SameDiffConv extends BaseSameDiffLayer { private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); + //Order to match 'vanilla' conv layer implementation, for easy comparison + private static final List PARAM_KEYS = Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); private int nIn; private int nOut; @@ -27,8 +30,9 @@ public class SameDiffConv extends BaseSameDiffLayer { private int[] stride; private int[] padding; private ConvolutionMode cm; + private int[] dilation; - private Map paramShapes; + private Map paramShapes; protected SameDiffConv(Builder b) { super(b); @@ -39,12 +43,13 @@ protected SameDiffConv(Builder b) { this.stride = b.stride; this.padding = b.padding; this.cm = b.cm; + this.dilation = b.dilation; } @Override public InputType getOutputType(int layerIndex, InputType inputType) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional)inputType; - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[]{1,1}, + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[]{1, 1}, cm, nOut, layerIndex, getLayerName(), SameDiffConv.class); } @@ -71,12 +76,23 @@ public List biasKeys() { return BIAS_KEYS; } + @Override + public List paramKeys() { + return PARAM_KEYS; + } + + @Override + public char paramReshapeOrder(String param) { + //To match DL4J + return 'c'; + } + @Override public Map paramShapes() { - if(paramShapes == null) { - int[] weightsShape = new int[]{nIn, nOut, kernel[0], kernel[1]}; + if (paramShapes == null) { + int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; int[] biasShape = new int[]{1, nOut}; - Map m = new HashMap<>(); + Map m = new HashMap<>(); m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); paramShapes = m; @@ -86,13 +102,34 @@ public Map paramShapes() { @Override public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { -// sameDiff.conv2d() - return null; + + SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); + SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); + + SDVariable[] vars = new SDVariable[]{layerInput, w, b}; + + Conv2DConfig c = Conv2DConfig.builder() + .kh(kernel[0]).kw(kernel[1]) + .ph(padding[0]).pw(padding[1]) + .sy(stride[0]).sx(stride[1]) + .dh(dilation[0]).dw(dilation[1]) + .build(); + + SDVariable conv = sameDiff.conv2d(vars, c); //TODO can't set name + + SDVariable out = activation.asSameDiff("out", sameDiff, conv); + + return Collections.singletonList("out"); } @Override public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { - throw new UnsupportedOperationException(); + if (activation == null) { + activation = Activation.fromIActivation(globalConfig.getActivationFn()); + } + if (cm == null) { + cm = globalConfig.getConvolutionMode(); + } } public static class Builder extends BaseSameDiffLayer.Builder { @@ -100,46 +137,52 @@ public static class Builder extends BaseSameDiffLayer.Builder { private int nIn; private int nOut; private Activation activation = Activation.TANH; - private int[] kernel = new int[]{2,2}; - private int[] stride = new int[]{1,1}; - private int[] padding = new int[]{0,0}; + private int[] kernel = new int[]{2, 2}; + private int[] stride = new int[]{1, 1}; + private int[] padding = new int[]{0, 0}; + private int[] dilation = new int[]{1, 1}; private ConvolutionMode cm = ConvolutionMode.Same; - public Builder nIn(int nIn){ + public Builder nIn(int nIn) { this.nIn = nIn; return this; } - public Builder nOut(int nOut){ + public Builder nOut(int nOut) { this.nOut = nOut; return this; } - public Builder activation(Activation activation){ + public Builder activation(Activation activation) { this.activation = activation; return this; } - public Builder kernel(int... k){ + public Builder kernelSize(int... k) { this.kernel = k; return this; } - public Builder stride(int... s){ + public Builder stride(int... s) { this.stride = s; return this; } - public Builder padding(int... p){ + public Builder padding(int... p) { this.padding = p; return this; } - public Builder convolutionMode(ConvolutionMode cm){ + public Builder convolutionMode(ConvolutionMode cm) { this.cm = cm; return this; } + public Builder dilation(int... d) { + this.dilation = d; + return this; + } + @Override public SameDiffConv build() { return new SameDiffConv(this); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 09613d77b5a9..d7b41b57e1fd 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -127,6 +127,10 @@ public List paramKeys(){ return paramKeys; } + public char paramReshapeOrder(String param){ + return 'f'; + } + public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ if(Double.isNaN(l1)){ l1 = b.getL1(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index fb0bbc512c22..63bf62728c37 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -73,7 +73,7 @@ public boolean isBiasParam(Layer layer, String key) { @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); - Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView); + Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView, sd); if(initializeParams){ //TODO log.warn("***** SameDiffParamInitializer: Parameter initialization not yet implemented *****"); @@ -89,10 +89,11 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi @Override public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); - return subsetAndReshape(sd.paramKeys(), sd.paramShapes(), gradientView); + return subsetAndReshape(sd.paramKeys(), sd.paramShapes(), gradientView, sd); } - private Map subsetAndReshape(List params, Map paramShapes, INDArray view){ + private Map subsetAndReshape(List params, Map paramShapes, INDArray view, + BaseSameDiffLayer sdl){ Map out = new LinkedHashMap<>(); int soFar = 0; for(String s : params){ @@ -100,7 +101,7 @@ private Map subsetAndReshape(List params, Map Date: Fri, 22 Dec 2017 23:15:42 +1100 Subject: [PATCH 13/34] More samediff conv --- .../samediff/SameDiffTestConv.java | 45 +++++++++++++++++++ .../samediff/testlayers/SameDiffConv.java | 26 +++++++++-- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java index 47191cb3ab83..132e9fb3b11c 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java @@ -14,8 +14,11 @@ import org.deeplearning4j.samediff.testlayers.SameDiffConv; import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -94,6 +97,7 @@ public void testSameDiffConvForward() { .dilation(dilation) .convolutionMode(cm) .activation(a) + .hasBias(false) //TODO TEST BOTH CASES .build()) .build(); @@ -112,6 +116,7 @@ public void testSameDiffConvForward() { .dilation(dilation) .convolutionMode(cm) .activation(a) + .hasBias(false) //TODO TEST BOTH CASES .build()) .build(); @@ -146,4 +151,44 @@ public void testSameDiffConvForward() { } } } + + @Test + public void testConv2dBasic(){ + int nIn = 3; + int nOut = 4; + int kH = 2; + int kW = 2; + + int mb = 3; + int imgH = 28; + int imgW = 28; + + SameDiff sd = SameDiff.create(); + INDArray wArr = Nd4j.create(nOut, nIn, kH, kW); //As per DL4J + INDArray bArr = Nd4j.create(1, nOut); + INDArray inArr = Nd4j.create(mb, nIn, imgH, imgW); + + SDVariable in = sd.var("in", inArr); + SDVariable w = sd.var("W", wArr); + SDVariable b = sd.var("b", bArr); + + //Order: https://github.com/deeplearning4j/libnd4j/blob/6c41ea5528bb1f454e92a9da971de87b93ff521f/include/ops/declarable/generic/convo/conv2d.cpp#L20-L22 + //in, w, b - bias is optional + SDVariable[] vars = new SDVariable[]{in, w, b}; + + Conv2DConfig c = Conv2DConfig.builder() + .kh(kH).kw(kW) + .ph(0).pw(0) + .sy(1).sx(1) + .dh(1).dw(1) + .isSameMode(false) + .build(); + + SDVariable out = sd.conv2d(vars, c); + + INDArray outArr = sd.execAndEndResult(); + //Expected output size: out = (in - k + 2*p)/s + 1 = (28-2+0)/1+1 = 27 + int[] outShape = outArr.shape(); + assertArrayEquals(new int[]{mb, nOut, 27, 27}, outShape); + } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index a2bad93ed201..238f1196def6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -31,6 +31,7 @@ public class SameDiffConv extends BaseSameDiffLayer { private int[] padding; private ConvolutionMode cm; private int[] dilation; + private boolean hasBias; private Map paramShapes; @@ -44,6 +45,7 @@ protected SameDiffConv(Builder b) { this.padding = b.padding; this.cm = b.cm; this.dilation = b.dilation; + this.hasBias = b.hasBias; } @Override @@ -73,12 +75,20 @@ public List weightKeys() { @Override public List biasKeys() { - return BIAS_KEYS; + if(hasBias) { + return BIAS_KEYS; + } else { + return Collections.emptyList(); + } } @Override public List paramKeys() { - return PARAM_KEYS; + if(hasBias) { + return PARAM_KEYS; + } else { + return WEIGHT_KEYS; + } } @Override @@ -91,10 +101,12 @@ public char paramReshapeOrder(String param) { public Map paramShapes() { if (paramShapes == null) { int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; - int[] biasShape = new int[]{1, nOut}; Map m = new HashMap<>(); m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); - m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); + if(hasBias) { + int[] biasShape = new int[]{1, nOut}; + m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); + } paramShapes = m; } return paramShapes; @@ -142,6 +154,7 @@ public static class Builder extends BaseSameDiffLayer.Builder { private int[] padding = new int[]{0, 0}; private int[] dilation = new int[]{1, 1}; private ConvolutionMode cm = ConvolutionMode.Same; + private boolean hasBias = true; public Builder nIn(int nIn) { this.nIn = nIn; @@ -183,6 +196,11 @@ public Builder dilation(int... d) { return this; } + public Builder hasBias(boolean hasBias){ + this.hasBias = hasBias; + return this; + } + @Override public SameDiffConv build() { return new SameDiffConv(this); From 177536447cfd04e8abc903c3722ef216e8463932 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 28 Dec 2017 21:30:43 +1100 Subject: [PATCH 14/34] Add SameDiffLayerUtils --- ...iffTestConv.java => TestSameDiffConv.java} | 2 +- ...fTestDense.java => TestSameDiffDense.java} | 2 +- .../samediff/testlayers/SameDiffConv.java | 3 +- .../samediff/testlayers/SameDiffDense.java | 3 +- .../layers/samediff/SameDiffLayerUtils.java | 28 +++++++++++++++++++ 5 files changed, 34 insertions(+), 4 deletions(-) rename deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/{SameDiffTestConv.java => TestSameDiffConv.java} (99%) rename deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/{SameDiffTestDense.java => TestSameDiffDense.java} (99%) create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayerUtils.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java similarity index 99% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index 132e9fb3b11c..aaaa969c91fb 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -28,7 +28,7 @@ import static org.junit.Assert.*; @Slf4j -public class SameDiffTestConv { +public class TestSameDiffConv { @Test public void testSameDiffConvBasic() { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java similarity index 99% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java index 624455e57ced..b292f8bc85d3 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/SameDiffTestDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java @@ -28,7 +28,7 @@ import static org.junit.Assert.assertNotNull; @Slf4j -public class SameDiffTestDense { +public class TestSameDiffDense { @Test public void testSameDiffDenseBasic() { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index 238f1196def6..de8a691a2194 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -7,6 +7,7 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.util.ConvolutionUtils; import org.nd4j.autodiff.samediff.SDVariable; @@ -137,7 +138,7 @@ public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map defineLayer(SameDiff sd, SDVariable layerInput, Map, Activation> activationMap; + + private SameDiffLayerUtils(){ } + + public static Activation fromIActivation(IActivation a){ + + if(activationMap == null){ + Map,Activation> m = new HashMap<>(); + for(Activation act : Activation.values()){ + m.put(act.getActivationFunction().getClass(), act); + } + activationMap = m; + } + + return activationMap.get(a.getClass()); + } + +} From 84299fa9bb27ed16b39f3cd0f3d207cb728f76ff Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 29 Dec 2017 11:26:44 +1100 Subject: [PATCH 15/34] SameDiff dense: multi-layer forward test --- .../samediff/TestSameDiffDense.java | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java index b292f8bc85d3..4712155574a2 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java @@ -69,7 +69,7 @@ public void testSameDiffDenseForward() { Activation.SOFTPLUS, Activation.SOFTSIGN, // Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 - Activation.HARDTANH, //NPE + Activation.HARDTANH, // Activation.RELU //JVM crash }; @@ -118,6 +118,76 @@ public void testSameDiffDenseForward() { } } + @Test + public void testSameDiffDenseForwardMultiLayer() { + + for (int minibatch : new int[]{5, 1}) { + int nIn = 3; + int nOut = 4; + + Activation[] afns = new Activation[]{ + Activation.TANH, + Activation.SIGMOID, + Activation.ELU, + Activation.IDENTITY, + Activation.SOFTPLUS, + Activation.SOFTSIGN, +// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.HARDTANH, +// Activation.RELU //JVM crash + }; + + for (Activation a : afns) { + log.info("Starting test - " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .activation(a).build()) + .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut) + .activation(a).build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut) + .activation(a).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(new DenseLayer.Builder().activation(a).nIn(nOut).nOut(nOut).build()) + .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut) + .activation(a).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); + } + } + } + @Test public void testSameDiffDenseBackward() { From 310ee106b259c101f1da26881b9d9a43608c954e Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 29 Dec 2017 11:37:32 +1100 Subject: [PATCH 16/34] JSON/equals for for SameDiffConv --- .../samediff/TestSameDiffConv.java | 140 +++++++++--------- .../samediff/testlayers/SameDiffConv.java | 10 ++ 2 files changed, 82 insertions(+), 68 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index aaaa969c91fb..7b610ad8c885 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -54,6 +54,8 @@ public void testSameDiffConvBasic() { assertArrayEquals(new int[]{nOut, nIn, kH, kW}, pt1.get(ConvolutionParamInitializer.WEIGHT_KEY).shape()); assertArrayEquals(new int[]{1, nOut}, pt1.get(ConvolutionParamInitializer.BIAS_KEY).shape()); + + TestUtils.testModelSerialization(net); } @Test @@ -74,74 +76,76 @@ public void testSameDiffConvForward() { // Activation.RELU //JVM crash }; - for(int nIn : new int[]{3,4}){ - for( int nOut : new int[]{4,5}){ - for( int[] kernel : new int[][]{{2,2}, {2,1}, {3,2}}){ - for( int[] strides : new int[][]{{1,1}, {2,2}, {2,1}}){ - for( int[] dilation : new int[][]{{1,1}, {2,2}, {1,2}}){ - for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}){ - for(Activation a : afns){ - String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn - + ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride=" - + Arrays.toString(strides) + ", dilation=" + Arrays.toString(dilation) - + ", ConvolutionMode=" + cm + ", ActFn=" + a; - log.info("Starting test: " + msg); - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffConv.Builder() - .nIn(nIn) - .nOut(nOut) - .kernelSize(kernel) - .stride(strides) - .dilation(dilation) - .convolutionMode(cm) - .activation(a) - .hasBias(false) //TODO TEST BOTH CASES - .build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertNotNull(net.paramTable()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new ConvolutionLayer.Builder() - .nIn(nIn) - .nOut(nOut) - .kernelSize(kernel) - .stride(strides) - .dilation(dilation) - .convolutionMode(cm) - .activation(a) - .hasBias(false) //TODO TEST BOTH CASES - .build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(msg, net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(msg, params2, params1); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - - assertEquals(msg, outExp, out); - - //Also check serialization: - MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); - INDArray outLoaded = netLoaded.output(in); - - assertEquals(msg, outExp, outLoaded); + for(boolean hasBias : new boolean[]{true, false}) { + for (int nIn : new int[]{3, 4}) { + for (int nOut : new int[]{4, 5}) { + for (int[] kernel : new int[][]{{2, 2}, {2, 1}, {3, 2}}) { + for (int[] strides : new int[][]{{1, 1}, {2, 2}, {2, 1}}) { + for (int[] dilation : new int[][]{{1, 1}, {2, 2}, {1, 2}}) { + for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) { + for (Activation a : afns) { + String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn + + ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride=" + + Arrays.toString(strides) + ", dilation=" + Arrays.toString(dilation) + + ", ConvolutionMode=" + cm + ", ActFn=" + a + ", hasBias=" + hasBias; + log.info("Starting test: " + msg); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffConv.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new ConvolutionLayer.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(msg, net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(msg, params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(msg, outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(msg, outExp, outLoaded); + } } } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index de8a691a2194..5be9895cbfde 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -1,5 +1,7 @@ package org.deeplearning4j.samediff.testlayers; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -14,9 +16,13 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import java.util.*; +@Data +@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@JsonIgnoreProperties({"paramShapes"}) public class SameDiffConv extends BaseSameDiffLayer { private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); @@ -49,6 +55,10 @@ protected SameDiffConv(Builder b) { this.hasBias = b.hasBias; } + private SameDiffConv(){ + //No arg constructor for Jackson/JSON serialization + } + @Override public InputType getOutputType(int layerIndex, InputType inputType) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; From 6e3620922fb75d542ab6c7eb488210b6f8617abd Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 29 Dec 2017 17:44:41 +1100 Subject: [PATCH 17/34] Refactoring, first steps for SameDiff output layer --- .../samediff/testlayers/SameDiffOutput.java | 213 ++++++++++++++++++ .../samediff/AbstractSameDiffLayer.java | 213 ++++++++++++++++++ .../layers/samediff/BaseSameDiffLayer.java | 169 +------------- .../samediff/BaseSameDiffOutputLayer.java | 52 +++++ .../layers/samediff/SameDiffOutputLayer.java | 117 ++++++++++ 5 files changed, 597 insertions(+), 167 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java new file mode 100644 index 000000000000..3a9a3f6a172c --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java @@ -0,0 +1,213 @@ +package org.deeplearning4j.samediff.testlayers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ops.LossFunction; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; + +import java.util.*; + +@Data +@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@JsonIgnoreProperties("paramShapes") +public class SameDiffOutput extends BaseSameDiffOutputLayer { + + private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); + private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); + private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); + + private Map paramShapes; + + private int nIn; + private int nOut; + private Activation activation; + private LossFunctions.LossFunction lossFn; + + protected SameDiffOutput(Builder builder) { + super(builder); + + nIn = builder.nIn; + nOut = builder.nOut; + activation = builder.activation; + lossFn = builder.lossFn; + } + + private SameDiffOutput(){ + //No op constructor for Jackson + } + + @Override + public String outputActivationsKey() { + return "out"; + } + + @Override + public String lossKey() { + return "loss"; + } + + @Override + public int[] labelShape() { + return new int[]{-1, nOut}; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return null; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if(override){ + this.nIn = ((InputType.InputTypeFeedForward)inputType).getSize(); + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return null; + } + + @Override + public List weightKeys() { + return W_KEYS; + } + + @Override + public List biasKeys() { + return B_KEYS; + } + + @Override + public Map paramShapes() { + if(paramShapes == null){ + paramShapes = new HashMap<>(); + paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); + paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); + } + return paramShapes; + } + + @Override + public List defineLayer(SameDiff sd, SDVariable layerInput, SDVariable layerLabel, Map paramTable) { + SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); + SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = activation.asSameDiff("out", sd, z); + +// //TODO for now: Calculate MSE only +// SDVariable diff = out.sub(layerLabel); + int[] labelShape = labelShape(); +// SDVariable sqDiff = diff.mul(diff); +// SDVariable mse = sd.loss + + String lossKey = lossKey(); + SDVariable loss; + int d = 1; + switch (lossFn){ + case MSE: + loss = sd.lossMSE( lossKey, out, layerLabel, d); + break; + case L1: + loss = sd.lossL1( lossKey, out, layerLabel, d); + break; + case XENT: + loss = sd.lossBinaryXENT( lossKey, out, layerLabel, d); + break; + case MCXENT: + loss = sd.lossMCXENT( lossKey, out, layerLabel, d); + break; + case SQUARED_LOSS: + loss = sd.lossMSE( lossKey + "-pre", out, layerLabel, d).mul( lossKey, labelShape[1]); + break; + case NEGATIVELOGLIKELIHOOD: + loss = sd.lossNegativeLogLikelihood( lossKey, out, layerLabel, d); + break; + case HINGE: + loss = sd.lossHinge( lossKey, out, layerLabel, d); + break; + case SQUARED_HINGE: + loss = sd.lossSquaredHinge( lossKey, out, layerLabel, d); + break; + case KL_DIVERGENCE: + loss = sd.lossKLD( lossKey, out, layerLabel, d); + break; + case MEAN_ABSOLUTE_ERROR: + loss = sd.lossMAE( lossKey, out, layerLabel, d); + break; + case L2: + loss = sd.lossL2( lossKey, out, layerLabel, d); + break; + case MEAN_SQUARED_LOGARITHMIC_ERROR: + loss = sd.lossMSLE( lossKey, out, layerLabel, d); + break; + case POISSON: + loss = sd.lossPoisson( lossKey, out, layerLabel, d); + break; + case EXPLL: + case RMSE_XENT: + case RECONSTRUCTION_CROSSENTROPY: + case CUSTOM: + case COSINE_PROXIMITY: + case MEAN_ABSOLUTE_PERCENTAGE_ERROR: + default: + throw new UnsupportedOperationException("Unsupported loss function: " + lossFn); + } + + + return Collections.singletonList("out"); + } + + @Override + public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + if(activation == null){ + activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + } + } + + public static class Builder extends BaseSameDiffOutputLayer.Builder { + + private int nIn; + private int nOut; + private Activation activation; + private LossFunctions.LossFunction lossFn = LossFunctions.LossFunction.MSE; + + public Builder nIn(int nIn){ + this.nIn = nIn; + return this; + } + + public Builder nOut(int nOut){ + this.nOut = nOut; + return this; + } + + public Builder activation(Activation activation){ + this.activation = activation; + return this; + } + + public Builder lossFunction(LossFunctions.LossFunction lossFn){ + this.lossFn = lossFn; + return this; + } + + @Override + public SameDiffOutput build() { + return new SameDiffOutput(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java new file mode 100644 index 000000000000..43100855c5e2 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -0,0 +1,213 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.layers.samediff.SameDiffLayer; +import org.deeplearning4j.nn.params.SameDiffParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.learning.config.IUpdater; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +@Data +@EqualsAndHashCode(callSuper = true) +public abstract class AbstractSameDiffLayer extends Layer { + + protected double l1; + protected double l2; + protected double l1Bias; + protected double l2Bias; + protected IUpdater updater; + protected IUpdater biasUpdater; + + + private List paramKeys; + + protected AbstractSameDiffLayer(Builder builder){ + super(builder); + this.l1 = builder.l1; + this.l2 = builder.l2; + this.l1Bias = builder.l1Bias; + this.l2Bias = builder.l2Bias; + this.updater = builder.updater; + this.biasUpdater = builder.biasUpdater; + } + + protected AbstractSameDiffLayer(){ + //No op constructor for Jackson + } + + @Override + public abstract InputType getOutputType(int layerIndex, InputType inputType); + + @Override + public abstract void setNIn(InputType inputType, boolean override); + + @Override + public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); + + public abstract List weightKeys(); + + public abstract List biasKeys(); + + public abstract Map paramShapes(); + + public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); + + @Override + public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams); + + //================================================================================================================== + + @Override + public ParamInitializer initializer() { + return SameDiffParamInitializer.getInstance(); + } + + @Override + public double getL1ByParam(String paramName) { + return (initializer().isWeightParam(this, paramName) ? l1 : l1Bias); + } + + @Override + public double getL2ByParam(String paramName) { + return (initializer().isWeightParam(this, paramName) ? l2 : l2Bias); + } + + @Override + public IUpdater getUpdaterByParam(String paramName){ + if(biasUpdater != null && initializer().isBiasParam(this, paramName)){ + return biasUpdater; + } else if(initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)){ + return updater; + } + throw new IllegalStateException("Unknown parameter key: " + paramName); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return new LayerMemoryReport(); //TODO + } + + public List paramKeys(){ + if(paramKeys == null){ + List pk = new ArrayList<>(); + pk.addAll(weightKeys()); + pk.addAll(biasKeys()); + paramKeys = pk; + } + return paramKeys; + } + + public char paramReshapeOrder(String param){ + return 'f'; + } + + public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ + if(Double.isNaN(l1)){ + l1 = b.getL1(); + } + if(Double.isNaN(l2)){ + l2 = b.getL2(); + } + if(Double.isNaN(l1Bias)){ + l1Bias = b.getL1Bias(); + } + if(Double.isNaN(l2Bias)){ + l2Bias = b.getL2Bias(); + } + if(updater == null){ + updater = b.getIUpdater(); + } + if(biasUpdater == null){ + biasUpdater = b.getBiasUpdater(); + } + + applyGlobalConfigToLayer(b); + } + + public static abstract class Builder> extends Layer.Builder { + + protected double l1 = Double.NaN; + protected double l2 = Double.NaN; + protected double l1Bias = Double.NaN; + protected double l2Bias = Double.NaN; + protected IUpdater updater = null; + protected IUpdater biasUpdater = null; + + /** + * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization + * coefficient for the bias. + */ + public T l1(double l1) { + this.l1 = l1; + return (T) this; + } + + /** + * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization + * coefficient for the bias. + */ + public T l2(double l2) { + this.l2 = l2; + return (T) this; + } + + /** + * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} + */ + public T l1Bias(double l1Bias) { + this.l1Bias = l1Bias; + return (T) this; + } + + /** + * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)} + */ + public T l2Bias(double l2Bias) { + this.l2Bias = l2Bias; + return (T) this; + } + + /** + * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} + * or {@link org.nd4j.linalg.learning.config.Nesterovs} + * + * @param updater Updater to use + */ + public T updater(IUpdater updater) { + this.updater = updater; + return (T) this; + } + + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #updater(IUpdater)} + * + * @param biasUpdater Updater to use for bias parameters + */ + public T biasUpdater(IUpdater biasUpdater){ + this.biasUpdater = biasUpdater; + return (T) this; + } + + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index d7b41b57e1fd..8104d2598075 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -23,51 +23,19 @@ @Data @EqualsAndHashCode(callSuper = true) -public abstract class BaseSameDiffLayer extends Layer { +public abstract class BaseSameDiffLayer extends AbstractSameDiffLayer { - protected double l1; - protected double l2; - protected double l1Bias; - protected double l2Bias; - protected IUpdater updater; - protected IUpdater biasUpdater; - - - private List paramKeys; protected BaseSameDiffLayer(Builder builder){ super(builder); - this.l1 = builder.l1; - this.l2 = builder.l2; - this.l1Bias = builder.l1Bias; - this.l2Bias = builder.l2Bias; - this.updater = builder.updater; - this.biasUpdater = builder.biasUpdater; } protected BaseSameDiffLayer(){ //No op constructor for Jackson } - @Override - public abstract InputType getOutputType(int layerIndex, InputType inputType); - - @Override - public abstract void setNIn(InputType inputType, boolean override); - - @Override - public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); - - public abstract List weightKeys(); - - public abstract List biasKeys(); - - public abstract Map paramShapes(); - public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); - public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); - //================================================================================================================== @Override @@ -82,142 +50,9 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, return ret; } - @Override - public ParamInitializer initializer() { - return SameDiffParamInitializer.getInstance(); - } - - @Override - public double getL1ByParam(String paramName) { - return (initializer().isWeightParam(this, paramName) ? l1 : l1Bias); - } - - @Override - public double getL2ByParam(String paramName) { - return (initializer().isWeightParam(this, paramName) ? l2 : l2Bias); - } - - @Override - public IUpdater getUpdaterByParam(String paramName){ - if(biasUpdater != null && initializer().isBiasParam(this, paramName)){ - return biasUpdater; - } else if(initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)){ - return updater; - } - throw new IllegalStateException("Unknown parameter key: " + paramName); - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return new LayerMemoryReport(); //TODO - } - - public List paramKeys(){ - if(paramKeys == null){ - List pk = new ArrayList<>(); - pk.addAll(weightKeys()); - pk.addAll(biasKeys()); - paramKeys = pk; - } - return paramKeys; - } - - public char paramReshapeOrder(String param){ - return 'f'; - } - - public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ - if(Double.isNaN(l1)){ - l1 = b.getL1(); - } - if(Double.isNaN(l2)){ - l2 = b.getL2(); - } - if(Double.isNaN(l1Bias)){ - l1Bias = b.getL1Bias(); - } - if(Double.isNaN(l2Bias)){ - l2Bias = b.getL2Bias(); - } - if(updater == null){ - updater = b.getIUpdater(); - } - if(biasUpdater == null){ - biasUpdater = b.getBiasUpdater(); - } - - applyGlobalConfigToLayer(b); - } - - public static abstract class Builder> extends Layer.Builder { - - protected double l1 = Double.NaN; - protected double l2 = Double.NaN; - protected double l1Bias = Double.NaN; - protected double l2Bias = Double.NaN; - protected IUpdater updater = null; - protected IUpdater biasUpdater = null; - - /** - * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization - * coefficient for the bias. - */ - public T l1(double l1) { - this.l1 = l1; - return (T) this; - } - - /** - * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization - * coefficient for the bias. - */ - public T l2(double l2) { - this.l2 = l2; - return (T) this; - } - - /** - * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} - */ - public T l1Bias(double l1Bias) { - this.l1Bias = l1Bias; - return (T) this; - } - - /** - * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)} - */ - public T l2Bias(double l2Bias) { - this.l2Bias = l2Bias; - return (T) this; - } + public static abstract class Builder> extends AbstractSameDiffLayer.Builder { - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} - * or {@link org.nd4j.linalg.learning.config.Nesterovs} - * - * @param updater Updater to use - */ - public T updater(IUpdater updater) { - this.updater = updater; - return (T) this; - } - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as - * set by {@link #updater(IUpdater)} - * - * @param biasUpdater Updater to use for bias parameters - */ - public T biasUpdater(IUpdater biasUpdater){ - this.biasUpdater = biasUpdater; - return (T) this; - } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java new file mode 100644 index 000000000000..d11876dd81cf --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java @@ -0,0 +1,52 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.layers.samediff.SameDiffOutputLayer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +public abstract class BaseSameDiffOutputLayer extends AbstractSameDiffLayer { + + protected BaseSameDiffOutputLayer(Builder builder){ + super(builder); + } + + protected BaseSameDiffOutputLayer(){ + //No arg for JSON/Jackson + } + + public abstract String outputActivationsKey(); + + public abstract String lossKey(); + + public abstract int[] labelShape(); + + + public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable layerLabel, Map paramTable); + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams) { + SameDiffOutputLayer ret = new SameDiffOutputLayer(conf); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + public static abstract class Builder> extends AbstractSameDiffLayer.Builder { + + + + + } + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java new file mode 100644 index 000000000000..3cf475f17bf7 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -0,0 +1,117 @@ +package org.deeplearning4j.nn.layers.samediff; + +import org.deeplearning4j.nn.api.layers.IOutputLayer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class SameDiffOutputLayer extends SameDiffLayer implements IOutputLayer { + + + public SameDiffOutputLayer(NeuralNetConfiguration conf) { + super(conf); + } + + @Override + public void setLabels(INDArray labels) { + + } + + @Override + public INDArray getLabels() { + return null; + } + + @Override + public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { + return 0; + } + + @Override + public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { + return null; + } + + @Override + public double f1Score(DataSet data) { + throw new UnsupportedOperationException(); + } + + @Override + public double f1Score(INDArray examples, INDArray labels) { + throw new UnsupportedOperationException(); + } + + @Override + public int numLabels() { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(DataSetIterator iter) { + throw new UnsupportedOperationException(); + } + + @Override + public int[] predict(INDArray examples) { + throw new UnsupportedOperationException(); + } + + @Override + public List predict(DataSet dataSet) { + throw new UnsupportedOperationException(); + } + + @Override + public INDArray labelProbabilities(INDArray examples) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(INDArray examples, INDArray labels) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(DataSet data) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(INDArray examples, int[] labels) { + throw new UnsupportedOperationException(); + } + + protected void doInit(){ + sameDiff = SameDiff.create(); + Map p = paramTable(); + + int[] inputShape = input.shape().clone(); +// inputShape[0] = -1; //TODO THIS DOESN'T ENABLE VARIABLE SIZE MINIBATCHES + SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); + Map paramShapes = layerConf().paramShapes(); + Map params = new LinkedHashMap<>(); + for(String s : layerConf().paramKeys()){ + int[] ps = paramShapes.get(s); + SDVariable v = sameDiff.var(s, ps); + params.put(s, v); + } + List outputKeys = layerConf().defineLayer(sameDiff, inputVar, params); + if(outputKeys == null || outputKeys.size() != 1){ + throw new IllegalStateException("Invalid output keys: " + outputKeys); + } + + for(Map.Entry e : p.entrySet()){ + sameDiff.associateArrayWithVariable(e.getValue(), sameDiff.getVariable(e.getKey())); + } + + this.outputKeys = outputKeys; + } +} From f1e15ed469957ec894d560fc9bf9e000ddc09388 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 29 Dec 2017 21:32:43 +1100 Subject: [PATCH 18/34] SameDiffOutput layer tests; fixes/improvements --- .../samediff/TestSameDiffOutput.java | 144 ++++++++++++++++++ .../samediff/testlayers/SameDiffOutput.java | 41 +++-- .../samediff/BaseSameDiffOutputLayer.java | 9 +- .../nn/layers/samediff/SameDiffLayer.java | 8 +- .../layers/samediff/SameDiffOutputLayer.java | 10 +- .../nn/params/SameDiffParamInitializer.java | 17 +-- 6 files changed, 201 insertions(+), 28 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java new file mode 100644 index 000000000000..bd85ca953f45 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java @@ -0,0 +1,144 @@ +package org.deeplearning4j.samediff; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.samediff.testlayers.SameDiffDense; +import org.deeplearning4j.samediff.testlayers.SameDiffOutput; +import org.junit.Test; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Map; + +import static org.junit.Assert.*; + +@Slf4j +public class TestSameDiffOutput { + + @Test + public void testSameDiffOutputBasic() { + + int nIn = 3; + int nOut = 4; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffOutput.Builder().nIn(nIn).nOut(nOut) + .activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Map pt1 = net.getLayer(0).paramTable(); + assertNotNull(pt1); + assertEquals(2, pt1.size()); + assertNotNull(pt1.get(DefaultParamInitializer.WEIGHT_KEY)); + assertNotNull(pt1.get(DefaultParamInitializer.BIAS_KEY)); + + assertArrayEquals(new int[]{nIn, nOut}, pt1.get(DefaultParamInitializer.WEIGHT_KEY).shape()); + assertArrayEquals(new int[]{1, nOut}, pt1.get(DefaultParamInitializer.BIAS_KEY).shape()); + + INDArray in = Nd4j.create(3, nIn); + INDArray out = net.output(in); + assertArrayEquals(new int[]{3, nOut}, out.shape()); + } + + @Test + public void testPlaceholderReduceSimple(){ + + SameDiff sd = SameDiff.create(); + SDVariable v = sd.var("in", new int[]{-1,10}); + SDVariable vSum = sd.sum(v, 1); + + } + + @Test + public void testSameDiffOutputForward() { + + for (int minibatch : new int[]{5, 1}) { + int nIn = 3; + int nOut = 4; + + LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ + LossFunctions.LossFunction.MSE, +// LossFunctions.LossFunction.MCXENT, +// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, +// LossFunctions.LossFunction.L2, +// LossFunctions.LossFunction.SQUARED_LOSS, +// LossFunctions.LossFunction.KL_DIVERGENCE, +// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, +// LossFunctions.LossFunction.XENT, +// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR + }; + + Activation[] afns = new Activation[]{ + Activation.TANH, //MSE +// Activation.SOFTMAX, //MCXENT +// Activation.SOFTMAX, //NLL +// Activation.SOFTPLUS, //L2 +// Activation.TANH, //Squared loss +// Activation.SIGMOID, //KLD +// Activation.TANH, //Squared loss +// Activation.SIGMOID //MSLE + }; + + for( int i=0; i params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); + } + } + } +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java index 3a9a3f6a172c..0d6873e43889 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java @@ -14,6 +14,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ops.LossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import java.util.*; @@ -27,7 +28,7 @@ public class SameDiffOutput extends BaseSameDiffOutputLayer { private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); - private Map paramShapes; + private Map paramShapes; private int nIn; private int nOut; @@ -43,7 +44,7 @@ protected SameDiffOutput(Builder builder) { lossFn = builder.lossFn; } - private SameDiffOutput(){ + private SameDiffOutput() { //No op constructor for Jackson } @@ -53,8 +54,8 @@ public String outputActivationsKey() { } @Override - public String lossKey() { - return "loss"; + public Pair lossKeys() { + return new Pair<>("lossPerEx", "score"); } @Override @@ -69,8 +70,8 @@ public InputType getOutputType(int layerIndex, InputType inputType) { @Override public void setNIn(InputType inputType, boolean override) { - if(override){ - this.nIn = ((InputType.InputTypeFeedForward)inputType).getSize(); + if (override) { + this.nIn = ((InputType.InputTypeFeedForward) inputType).getSize(); } } @@ -91,7 +92,7 @@ public List biasKeys() { @Override public Map paramShapes() { - if(paramShapes == null){ + if (paramShapes == null) { paramShapes = new HashMap<>(); paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); @@ -110,12 +111,13 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, SDVariable l // //TODO for now: Calculate MSE only // SDVariable diff = out.sub(layerLabel); - int[] labelShape = labelShape(); // SDVariable sqDiff = diff.mul(diff); // SDVariable mse = sd.loss - String lossKey = lossKey(); + int[] labelShape = labelShape(); + Pair lossKeys = lossKeys(); SDVariable loss; + /* int d = 1; switch (lossFn){ case MSE: @@ -165,6 +167,17 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, SDVariable l case MEAN_ABSOLUTE_PERCENTAGE_ERROR: default: throw new UnsupportedOperationException("Unsupported loss function: " + lossFn); + }*/ + + switch (lossFn) { + case MSE: + SDVariable diff = out.sub(layerLabel); + SDVariable sqDiff = diff.mul(diff); + SDVariable mse = sd.mean(lossKeys.getFirst(), sqDiff, 1); + SDVariable score = sd.mean(lossKeys.getSecond(), mse); + break; + default: + throw new UnsupportedOperationException("Not yet implemented: " + lossFn); } @@ -173,7 +186,7 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, SDVariable l @Override public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { - if(activation == null){ + if (activation == null) { activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); } } @@ -185,22 +198,22 @@ public static class Builder extends BaseSameDiffOutputLayer.Builder { private Activation activation; private LossFunctions.LossFunction lossFn = LossFunctions.LossFunction.MSE; - public Builder nIn(int nIn){ + public Builder nIn(int nIn) { this.nIn = nIn; return this; } - public Builder nOut(int nOut){ + public Builder nOut(int nOut) { this.nOut = nOut; return this; } - public Builder activation(Activation activation){ + public Builder activation(Activation activation) { this.activation = activation; return this; } - public Builder lossFunction(LossFunctions.LossFunction lossFn){ + public Builder lossFunction(LossFunctions.LossFunction lossFn) { this.lossFn = lossFn; return this; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java index d11876dd81cf..768927715a12 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java @@ -6,6 +6,7 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; import java.util.List; @@ -23,7 +24,13 @@ protected BaseSameDiffOutputLayer(){ public abstract String outputActivationsKey(); - public abstract String lossKey(); + /** + * Two keys: + * First - For the score *per example* (1 value per example) + * Second - for the average score (1 values for all examples) + * @return + */ + public abstract Pair lossKeys(); public abstract int[] labelShape(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index bcd64b1fed48..c5db84cb7964 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -2,6 +2,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -18,9 +19,9 @@ import java.util.List; import java.util.Map; -public class SameDiffLayer extends AbstractLayer { +public class SameDiffLayer extends AbstractLayer { - private static final String INPUT_KEY = "input"; + public static final String INPUT_KEY = "input"; protected SameDiff sameDiff; protected List outputKeys; @@ -181,6 +182,7 @@ public Map paramTable(boolean backpropParamsOnly) { } protected void doInit(){ + BaseSameDiffLayer bl = (BaseSameDiffLayer)layerConf(); sameDiff = SameDiff.create(); Map p = paramTable(); @@ -194,7 +196,7 @@ protected void doInit(){ SDVariable v = sameDiff.var(s, ps); params.put(s, v); } - List outputKeys = layerConf().defineLayer(sameDiff, inputVar, params); + List outputKeys = bl.defineLayer(sameDiff, inputVar, params); if(outputKeys == null || outputKeys.size() != 1){ throw new IllegalStateException("Invalid output keys: " + outputKeys); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java index 3cf475f17bf7..60df946004a2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -2,6 +2,7 @@ import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; @@ -14,6 +15,9 @@ public class SameDiffOutputLayer extends SameDiffLayer implements IOutputLayer { + public static final String LABEL_KEY = "label"; + + public SameDiffOutputLayer(NeuralNetConfiguration conf) { super(conf); @@ -90,12 +94,16 @@ public void fit(INDArray examples, int[] labels) { } protected void doInit(){ + BaseSameDiffOutputLayer ol = ((BaseSameDiffOutputLayer)layerConf()); + sameDiff = SameDiff.create(); Map p = paramTable(); int[] inputShape = input.shape().clone(); + int[] labelShape = ol.labelShape(); // inputShape[0] = -1; //TODO THIS DOESN'T ENABLE VARIABLE SIZE MINIBATCHES SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); + SDVariable labelVar = sameDiff.var(LABEL_KEY, labelShape); Map paramShapes = layerConf().paramShapes(); Map params = new LinkedHashMap<>(); for(String s : layerConf().paramKeys()){ @@ -103,7 +111,7 @@ protected void doInit(){ SDVariable v = sameDiff.var(s, ps); params.put(s, v); } - List outputKeys = layerConf().defineLayer(sameDiff, inputVar, params); + List outputKeys = ol.defineLayer(sameDiff, inputVar, labelVar, params); if(outputKeys == null || outputKeys.size() != 1){ throw new IllegalStateException("Invalid output keys: " + outputKeys); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 63bf62728c37..bfa7c686539b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -4,8 +4,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; -import org.deeplearning4j.nn.weights.WeightInitUtil; +import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.util.ArrayUtil; @@ -33,7 +32,7 @@ public int numParams(NeuralNetConfiguration conf) { @Override public int numParams(Layer layer) { - BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; Map m = sd.paramShapes(); int n = 0; for(int[] arr : m.values()){ @@ -44,19 +43,19 @@ public int numParams(Layer layer) { @Override public List paramKeys(Layer layer) { - BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.paramKeys(); } @Override public List weightKeys(Layer layer) { - BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.weightKeys(); } @Override public List biasKeys(Layer layer) { - BaseSameDiffLayer sd = (BaseSameDiffLayer)layer; + AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.biasKeys(); } @@ -72,7 +71,7 @@ public boolean isBiasParam(Layer layer, String key) { @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); + AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView, sd); if(initializeParams){ //TODO @@ -88,12 +87,12 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi @Override public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - BaseSameDiffLayer sd = (BaseSameDiffLayer) conf.getLayer(); + AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); return subsetAndReshape(sd.paramKeys(), sd.paramShapes(), gradientView, sd); } private Map subsetAndReshape(List params, Map paramShapes, INDArray view, - BaseSameDiffLayer sdl){ + AbstractSameDiffLayer sdl){ Map out = new LinkedHashMap<>(); int soFar = 0; for(String s : params){ From 18ce864db56e5de38183ea9db173e5dbcedb7362 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 2 Jan 2018 15:18:47 +1100 Subject: [PATCH 19/34] Additional tests and fixes --- .../deeplearning4j/nn/misc/TestLrChanges.java | 1 + .../samediff/TestSameDiffConv.java | 92 ++++++++++++++- .../samediff/TestSameDiffOutput.java | 109 +++++++++++++++++- 3 files changed, 190 insertions(+), 12 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java index 85f2034aab27..6b200d806059 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java @@ -6,6 +6,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.Test; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index 7b610ad8c885..d090e6805dd0 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -58,9 +58,85 @@ public void testSameDiffConvBasic() { TestUtils.testModelSerialization(net); } + @Test + public void testSameDiffConvForward_Debug() { + + int imgH = 16; + int imgW = 24; + int count = 0; + int minibatch = 5; + boolean hasBias = true; + int nIn = 3; + int nOut = 4; + int[] kernel = {2, 2}; + int[] strides = {1, 1}; + int[] dilation = {1, 1}; + ConvolutionMode cm = ConvolutionMode.Truncate; + Activation a = Activation.TANH; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffConv.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new ConvolutionLayer.Builder() + .nIn(nIn) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(new int[]{minibatch, nIn, imgH, imgW}); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); + } + @Test public void testSameDiffConvForward() { + int imgH = 8; + int imgW = 12; + int count = 0; for (int minibatch : new int[]{5, 1}) { @@ -72,11 +148,11 @@ public void testSameDiffConvForward() { Activation.SOFTPLUS, Activation.SOFTSIGN, // Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 - Activation.HARDTANH, //NPE -// Activation.RELU //JVM crash + Activation.HARDTANH, + Activation.RELU }; - for(boolean hasBias : new boolean[]{true, false}) { + for (boolean hasBias : new boolean[]{true, false}) { for (int nIn : new int[]{3, 4}) { for (int nOut : new int[]{4, 5}) { for (int[] kernel : new int[][]{{2, 2}, {2, 1}, {3, 2}}) { @@ -134,7 +210,7 @@ public void testSameDiffConvForward() { Map params2 = net2.paramTable(); assertEquals(msg, params2, params1); - INDArray in = Nd4j.rand(minibatch, nIn); + INDArray in = Nd4j.rand(new int[]{minibatch, nIn, imgH, imgW}); INDArray out = net.output(in); INDArray outExp = net2.output(in); @@ -157,7 +233,7 @@ public void testSameDiffConvForward() { } @Test - public void testConv2dBasic(){ + public void testConv2dBasic() { int nIn = 3; int nOut = 4; int kH = 2; @@ -188,7 +264,11 @@ public void testConv2dBasic(){ .isSameMode(false) .build(); - SDVariable out = sd.conv2d(vars, c); + SDVariable conv = sd.conv2d(vars, c); + + SDVariable out = sd.tanh("tanh", conv); +// SDVariable out = conv.add("out", 1.0); +// SDVariable out = sd.sum(conv, 1,2,3); INDArray outArr = sd.execAndEndResult(); //Expected output size: out = (in - k + 2*p)/s + 1 = (28-2+0)/1+1 = 27 diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java index bd85ca953f45..84a47c7cadbb 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java @@ -19,6 +19,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.util.Arrays; import java.util.Map; import static org.junit.Assert.*; @@ -29,6 +30,7 @@ public class TestSameDiffOutput { @Test public void testSameDiffOutputBasic() { + int minibatch = 3; int nIn = 3; int nOut = 4; @@ -50,18 +52,113 @@ public void testSameDiffOutputBasic() { assertArrayEquals(new int[]{nIn, nOut}, pt1.get(DefaultParamInitializer.WEIGHT_KEY).shape()); assertArrayEquals(new int[]{1, nOut}, pt1.get(DefaultParamInitializer.BIAS_KEY).shape()); - INDArray in = Nd4j.create(3, nIn); + INDArray in = Nd4j.create(minibatch, nIn); INDArray out = net.output(in); - assertArrayEquals(new int[]{3, nOut}, out.shape()); + assertArrayEquals(new int[]{minibatch, nOut}, out.shape()); } @Test - public void testPlaceholderReduceSimple(){ + public void test(){ SameDiff sd = SameDiff.create(); - SDVariable v = sd.var("in", new int[]{-1,10}); - SDVariable vSum = sd.sum(v, 1); + int nIn = 3; + int nOut = 4; + int minibatch = 3; + SDVariable input = sd.var("in", new int[]{-1,nIn}); + SDVariable label = sd.var("label", new int[]{-1, nOut}); + SDVariable weights = sd.var("W", new int[]{nIn,nOut}); + SDVariable bias = sd.var("b", new int[]{1,nOut}); + + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.tanh(z); + + SDVariable diff = out.sub(label); + SDVariable sqDiff = diff.mul(diff); + SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); + SDVariable avgMSE = sd.mean("loss", msePerEx, 0); + + INDArray inputArr = Nd4j.rand(minibatch, nIn); + INDArray labelArr = Nd4j.rand(minibatch, nOut); + INDArray weightsArr = Nd4j.rand(nIn, nOut); + INDArray biasArr = Nd4j.rand(1,nOut); + + sd.associateArrayWithVariable(inputArr, input); + sd.associateArrayWithVariable(labelArr, label); + sd.associateArrayWithVariable(weightsArr, weights); + sd.associateArrayWithVariable(biasArr, bias); + + INDArray result = sd.execAndEndResult(); + } + + @Test + public void testPlaceholderReduceSimple() { + SameDiff sd = SameDiff.create(); + SDVariable v = sd.var("in", new int[]{-1, 10}); + SDVariable vSum = sd.sum(v, 1); //Exception here + } + + @Test + public void testSequentialMeans() { + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{10, 10, 10}); + SDVariable mean1 = sd.mean(in, 2); //[10,10] out + SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out + + System.out.println(sd.asFlatPrint()); + } + + @Test + public void testSequentialMeansPlaceholder() { + for( int dim0 : new int[]{10, -1}){ + String msg = "Dimension 0 = " + dim0; + System.out.println(msg); + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{dim0, 9, 8}); + SDVariable mean1 = sd.mean(in, 2); //[10,9,8] -> [10,9] + SDVariable mean2 = sd.mean(mean1, 1); //[10,9] -> [10,1] + + INDArray inArr = Nd4j.create(10, 9, 8); + sd.associateArrayWithVariable(inArr, in); + + INDArray out = sd.execAndEndResult(); + + assertArrayEquals(msg, new int[]{10,1}, out.shape()); + } + } + + @Test + public void testReductionShapes1() { + + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{10, 9, 8}); + SDVariable mean1 = sd.mean(in, 2); //[10,9] out + SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out + sd.execAndEndResult(); //***Exception*** + + INDArray m1 = mean1.getArr(); + INDArray m2 = mean2.getArr(); + + assertArrayEquals(new int[]{10, 9}, m1.shape()); + assertArrayEquals(new int[]{10, 1}, m2.shape()); + } + + @Test + public void testReductionShapes2() { + + SameDiff sd2 = SameDiff.create(); + SDVariable in2 = sd2.var("in", new int[]{10, 9, 8}); + SDVariable meanA = sd2.mean(in2, 0); //[9,8] out + SDVariable meanB = sd2.mean(meanA, 0); //[1,8] out + sd2.execAndEndResult(); //***Exception*** + + INDArray mA = meanA.getArr(); + INDArray mB = meanB.getArr(); + + assertArrayEquals(new int[]{9, 8}, mA.shape()); + assertArrayEquals(new int[]{1, 8}, mB.shape()); } @Test @@ -94,7 +191,7 @@ public void testSameDiffOutputForward() { // Activation.SIGMOID //MSLE }; - for( int i=0; i Date: Tue, 2 Jan 2018 20:56:27 +1100 Subject: [PATCH 20/34] Fixes and improved tests --- .../samediff/TestSameDiffConv.java | 101 +++++++++--------- .../samediff/testlayers/SameDiffConv.java | 10 +- .../nn/layers/samediff/SameDiffLayer.java | 2 +- 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index d090e6805dd0..13f17fcb8a4d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -5,6 +5,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -18,14 +19,19 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; +import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; import java.util.Map; +import java.util.Random; import static org.junit.Assert.*; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.point; @Slf4j public class TestSameDiffConv { @@ -61,16 +67,16 @@ public void testSameDiffConvBasic() { @Test public void testSameDiffConvForward_Debug() { - int imgH = 16; - int imgW = 24; + int imgH = 3; + int imgW = 3; int count = 0; - int minibatch = 5; - boolean hasBias = true; - int nIn = 3; - int nOut = 4; + int minibatch = 1; + boolean hasBias = false; + int nIn = 1; + int nOut = 1; int[] kernel = {2, 2}; int[] strides = {1, 1}; - int[] dilation = {1, 1}; + int[] dilation = {2, 1}; ConvolutionMode cm = ConvolutionMode.Truncate; Activation a = Activation.TANH; @@ -101,6 +107,7 @@ public void testSameDiffConvForward_Debug() { .kernelSize(kernel) .stride(strides) .dilation(dilation) +// .dilation(new int[]{dilation[1], dilation[0]}) .convolutionMode(cm) .activation(a) .hasBias(hasBias) @@ -134,10 +141,16 @@ public void testSameDiffConvForward_Debug() { @Test public void testSameDiffConvForward() { - int imgH = 8; - int imgW = 12; + int imgH = 16; + int imgW = 20; int count = 0; + + //Note: to avoid the exporential number of tests here, we'll randomly run every Nth test only. + //With n=1, m=3 this is 1 out of every 3 tests (on average) + Random r = new Random(12345); + int n = 1; + int m = 3; for (int minibatch : new int[]{5, 1}) { Activation[] afns = new Activation[]{ @@ -160,6 +173,12 @@ public void testSameDiffConvForward() { for (int[] dilation : new int[][]{{1, 1}, {2, 2}, {1, 2}}) { for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) { for (Activation a : afns) { + int i = r.nextInt(m); + if (i >= n) { + //Example: n=2, m=3... skip on i=2, run test on i=0, i=1 + continue; + } + String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn + ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride=" + Arrays.toString(strides) + ", dilation=" + Arrays.toString(dilation) @@ -178,6 +197,16 @@ public void testSameDiffConvForward() { .activation(a) .hasBias(hasBias) .build()) + .layer(new SameDiffConv.Builder() + .nIn(nOut) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -197,6 +226,16 @@ public void testSameDiffConvForward() { .activation(a) .hasBias(hasBias) .build()) + .layer(new ConvolutionLayer.Builder() + .nIn(nOut) + .nOut(nOut) + .kernelSize(kernel) + .stride(strides) + .dilation(dilation) + .convolutionMode(cm) + .activation(a) + .hasBias(hasBias) + .build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); @@ -231,48 +270,4 @@ public void testSameDiffConvForward() { } } } - - @Test - public void testConv2dBasic() { - int nIn = 3; - int nOut = 4; - int kH = 2; - int kW = 2; - - int mb = 3; - int imgH = 28; - int imgW = 28; - - SameDiff sd = SameDiff.create(); - INDArray wArr = Nd4j.create(nOut, nIn, kH, kW); //As per DL4J - INDArray bArr = Nd4j.create(1, nOut); - INDArray inArr = Nd4j.create(mb, nIn, imgH, imgW); - - SDVariable in = sd.var("in", inArr); - SDVariable w = sd.var("W", wArr); - SDVariable b = sd.var("b", bArr); - - //Order: https://github.com/deeplearning4j/libnd4j/blob/6c41ea5528bb1f454e92a9da971de87b93ff521f/include/ops/declarable/generic/convo/conv2d.cpp#L20-L22 - //in, w, b - bias is optional - SDVariable[] vars = new SDVariable[]{in, w, b}; - - Conv2DConfig c = Conv2DConfig.builder() - .kh(kH).kw(kW) - .ph(0).pw(0) - .sy(1).sx(1) - .dh(1).dw(1) - .isSameMode(false) - .build(); - - SDVariable conv = sd.conv2d(vars, c); - - SDVariable out = sd.tanh("tanh", conv); -// SDVariable out = conv.add("out", 1.0); -// SDVariable out = sd.sum(conv, 1,2,3); - - INDArray outArr = sd.execAndEndResult(); - //Expected output size: out = (in - k + 2*p)/s + 1 = (28-2+0)/1+1 = 27 - int[] outShape = outArr.shape(); - assertArrayEquals(new int[]{mb, nOut, 27, 27}, outShape); - } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index 5be9895cbfde..a92eee48c39b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -127,15 +127,21 @@ public Map paramShapes() { public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); - SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); - SDVariable[] vars = new SDVariable[]{layerInput, w, b}; + SDVariable[] vars; + if(hasBias){ + SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); + vars = new SDVariable[]{layerInput, w, b}; + } else { + vars = new SDVariable[]{layerInput, w}; + } Conv2DConfig c = Conv2DConfig.builder() .kh(kernel[0]).kw(kernel[1]) .ph(padding[0]).pw(padding[1]) .sy(stride[0]).sx(stride[1]) .dh(dilation[0]).dw(dilation[1]) + .isSameMode(this.cm == ConvolutionMode.Same) .build(); SDVariable conv = sameDiff.conv2d(vars, c); //TODO can't set name diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index c5db84cb7964..9ed214799acd 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -132,7 +132,7 @@ public INDArray params() { @Override public INDArray getParam(String param) { - throw new UnsupportedOperationException("Not supported"); + return paramTable.get(param); } @Override From c50f4024dfd524f51630828937fcbde4575dcb5d Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 3 Jan 2018 14:12:04 +1100 Subject: [PATCH 21/34] First pass on SameDiff loss layer --- .../samediff/TestSameDiffConv.java | 124 +++++ .../samediff/TestSameDiffLoss.java | 229 +++++++++ .../samediff/testlayers/SameDiffLoss.java | 82 ++++ .../samediff/BaseSameDiffLossLayer.java | 41 ++ .../layers/samediff/NoParamSameDiffLayer.java | 92 ++++ .../nn/layers/samediff/SameDiffLossLayer.java | 453 ++++++++++++++++++ .../layers/samediff/SameDiffOutputLayer.java | 10 +- 7 files changed, 1027 insertions(+), 4 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index 13f17fcb8a4d..5cfa3660590e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -270,4 +270,128 @@ public void testSameDiffConvForward() { } } } + + + @Test + public void testConv2dEdgeCase(){ + Nd4j.getExecutioner().enableVerboseMode(true); + Nd4j.getExecutioner().enableDebugMode(true); + + INDArray in = Nd4j.create(1,1,1,4).assign(Nd4j.linspace(1,4,4)).muli(10); //NCHW + INDArray wArr = Nd4j.create(4,1,1,1); //dOut, dIt, kH, kW + wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,4,4)).addi(0.5); + + SameDiff sd = SameDiff.create(); + SDVariable i = sd.var("in", in); + SDVariable w = sd.var("w", wArr); + + Conv2DConfig conf = Conv2DConfig.builder() + .isSameMode(false) + .kw(1) + .kh(1) + .dh(1) + .dw(1) + .sy(1) + .sx(1) + .ph(0) + .pw(0) + .build(); + + SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); + + INDArray out = sd.execAndEndResult(); + + + //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here + INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); + INDArray exp = Nd4j.concat(1, in, in, in, in); + Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); + + for(int j=0; j<4; j++ ){ + System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); + } + + assertEquals(exp, out); + } + + + @Test + public void testConv2dEdgeCase2(){ + + INDArray in = Nd4j.create(1,1,1,4).assign(Nd4j.linspace(1,4,4)).muli(10); //NCHW + INDArray wArr = Nd4j.create(3,1,1,1); //dOut, dIt, kH, kW + wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,3,3)).addi(0.5); + + SameDiff sd = SameDiff.create(); + SDVariable i = sd.var("in", in); + SDVariable w = sd.var("w", wArr); + + Conv2DConfig conf = Conv2DConfig.builder() + .isSameMode(false) + .kw(1) + .kh(1) + .dh(1) + .dw(1) + .sy(1) + .sx(1) + .ph(0) + .pw(0) + .build(); + + SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); + + INDArray out = sd.execAndEndResult(); + + + //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here + INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); + INDArray exp = Nd4j.concat(1, in, in, in); + Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); + + for(int j=0; j<3; j++ ){ + System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); + } + + assertEquals(exp, out); + } + + @Test + public void testConv2dEdgeCase3(){ + + INDArray in = Nd4j.create(1,1,1,3).assign(Nd4j.linspace(1,3,3)).muli(10); //NCHW + INDArray wArr = Nd4j.create(4,1,1,1); //dOut, dIt, kH, kW + wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,4,4)).addi(0.5); + + SameDiff sd = SameDiff.create(); + SDVariable i = sd.var("in", in); + SDVariable w = sd.var("w", wArr); + + Conv2DConfig conf = Conv2DConfig.builder() + .isSameMode(false) + .kw(1) + .kh(1) + .dh(1) + .dw(1) + .sy(1) + .sx(1) + .ph(0) + .pw(0) + .build(); + + SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); + + INDArray out = sd.execAndEndResult(); + + + //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here + INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); + INDArray exp = Nd4j.concat(1, in, in, in, in); + Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); + + for(int j=0; j<4; j++ ){ + System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); + } + + assertEquals(exp, out); + } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java new file mode 100644 index 000000000000..cdca6e5330c5 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java @@ -0,0 +1,229 @@ +package org.deeplearning4j.samediff; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.samediff.testlayers.SameDiffLoss; +import org.deeplearning4j.samediff.testlayers.SameDiffOutput; +import org.junit.Test; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Map; + +import static org.junit.Assert.*; + +@Slf4j +public class TestSameDiffLoss { + + @Test + public void testSameDiffLossBasic() { + + int minibatch = 3; + int nIn = 3; + int nOut = 4; + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().nIn(3).nOut(4).activation(Activation.TANH).build()) + .layer(new SameDiffLoss.Builder().lossFunction(LossFunctions.LossFunction.MSE).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray in = Nd4j.create(minibatch, nIn); + INDArray out = net.output(in); + assertEquals(in, out); + } + + @Test + public void test(){ + + SameDiff sd = SameDiff.create(); + + int nIn = 3; + int nOut = 4; + int minibatch = 3; + SDVariable input = sd.var("in", new int[]{-1,nIn}); + SDVariable label = sd.var("label", new int[]{-1, nOut}); + SDVariable weights = sd.var("W", new int[]{nIn,nOut}); + SDVariable bias = sd.var("b", new int[]{1,nOut}); + + + SDVariable mmul = sd.mmul("mmul", input, weights); + SDVariable z = mmul.add("z", bias); + SDVariable out = sd.tanh(z); + + SDVariable diff = out.sub(label); + SDVariable sqDiff = diff.mul(diff); + SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); + SDVariable avgMSE = sd.mean("loss", msePerEx, 0); + + INDArray inputArr = Nd4j.rand(minibatch, nIn); + INDArray labelArr = Nd4j.rand(minibatch, nOut); + INDArray weightsArr = Nd4j.rand(nIn, nOut); + INDArray biasArr = Nd4j.rand(1,nOut); + + sd.associateArrayWithVariable(inputArr, input); + sd.associateArrayWithVariable(labelArr, label); + sd.associateArrayWithVariable(weightsArr, weights); + sd.associateArrayWithVariable(biasArr, bias); + + INDArray result = sd.execAndEndResult(); + } + + @Test + public void testPlaceholderReduceSimple() { + SameDiff sd = SameDiff.create(); + SDVariable v = sd.var("in", new int[]{-1, 10}); + SDVariable vSum = sd.sum(v, 1); //Exception here + } + + @Test + public void testSequentialMeans() { + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{10, 10, 10}); + SDVariable mean1 = sd.mean(in, 2); //[10,10] out + SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out + + System.out.println(sd.asFlatPrint()); + } + + @Test + public void testSequentialMeansPlaceholder() { + for( int dim0 : new int[]{10, -1}){ + String msg = "Dimension 0 = " + dim0; + System.out.println(msg); + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{dim0, 9, 8}); + SDVariable mean1 = sd.mean(in, 2); //[10,9,8] -> [10,9] + SDVariable mean2 = sd.mean(mean1, 1); //[10,9] -> [10,1] + + INDArray inArr = Nd4j.create(10, 9, 8); + sd.associateArrayWithVariable(inArr, in); + + INDArray out = sd.execAndEndResult(); + + assertArrayEquals(msg, new int[]{10,1}, out.shape()); + } + } + + @Test + public void testReductionShapes1() { + + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("in", new int[]{10, 9, 8}); + SDVariable mean1 = sd.mean(in, 2); //[10,9] out + SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out + sd.execAndEndResult(); //***Exception*** + + INDArray m1 = mean1.getArr(); + INDArray m2 = mean2.getArr(); + + assertArrayEquals(new int[]{10, 9}, m1.shape()); + assertArrayEquals(new int[]{10, 1}, m2.shape()); + } + + @Test + public void testReductionShapes2() { + + SameDiff sd2 = SameDiff.create(); + SDVariable in2 = sd2.var("in", new int[]{10, 9, 8}); + SDVariable meanA = sd2.mean(in2, 0); //[9,8] out + SDVariable meanB = sd2.mean(meanA, 0); //[1,8] out + sd2.execAndEndResult(); //***Exception*** + + INDArray mA = meanA.getArr(); + INDArray mB = meanB.getArr(); + + assertArrayEquals(new int[]{9, 8}, mA.shape()); + assertArrayEquals(new int[]{1, 8}, mB.shape()); + } + + @Test + public void testSameDiffOutputForward() { + + for (int minibatch : new int[]{5, 1}) { + int nIn = 3; + int nOut = 4; + + LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ + LossFunctions.LossFunction.MSE, +// LossFunctions.LossFunction.MCXENT, +// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, +// LossFunctions.LossFunction.L2, +// LossFunctions.LossFunction.SQUARED_LOSS, +// LossFunctions.LossFunction.KL_DIVERGENCE, +// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, +// LossFunctions.LossFunction.XENT, +// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR + }; + + Activation[] afns = new Activation[]{ + Activation.TANH, //MSE +// Activation.SOFTMAX, //MCXENT +// Activation.SOFTMAX, //NLL +// Activation.SOFTPLUS, //L2 +// Activation.TANH, //Squared loss +// Activation.SIGMOID, //KLD +// Activation.TANH, //Squared loss +// Activation.SIGMOID //MSLE + }; + + for (int i = 0; i < lossFns.length; i++) { + LossFunctions.LossFunction lf = lossFns[i]; + Activation a = afns[i]; + log.info("Starting test - " + lf + ", " + a); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new SameDiffOutput.Builder().nIn(nIn).nOut(nOut) + .lossFunction(lf) + .activation(a) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); + } + } + } +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java new file mode 100644 index 000000000000..1dffc0ec237a --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java @@ -0,0 +1,82 @@ +package org.deeplearning4j.samediff.testlayers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLossLayer; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; + +import java.util.*; + +@Data +@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@JsonIgnoreProperties("paramShapes") +public class SameDiffLoss extends BaseSameDiffLossLayer { + + private LossFunctions.LossFunction lossFn; + + protected SameDiffLoss(Builder builder) { + super(builder); + } + + private SameDiffLoss() { + //No op constructor for Jackson + } + + public Pair lossKeys() { + return new Pair<>("lossPerEx", "score"); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return null; + } + + @Override + public void defineLayer(SameDiff sd, SDVariable input, SDVariable label) { + + Pair lossKeys = lossKeys(); + SDVariable loss; + + switch (lossFn) { + case MSE: + SDVariable diff = input.sub(label); + SDVariable sqDiff = diff.mul(diff); + SDVariable mse = sd.mean(lossKeys.getFirst(), sqDiff, 1); + SDVariable score = sd.mean(lossKeys.getSecond(), mse); + break; + default: + throw new UnsupportedOperationException("Not yet implemented: " + lossFn); + } + } + + @Override + public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + + } + + public static class Builder extends BaseSameDiffLossLayer.Builder { + + private LossFunctions.LossFunction lossFn = LossFunctions.LossFunction.MSE; + + public Builder lossFunction(LossFunctions.LossFunction lossFn) { + this.lossFn = lossFn; + return this; + } + + @Override + public SameDiffLoss build() { + return new SameDiffLoss(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java new file mode 100644 index 000000000000..e5bcada3c439 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java @@ -0,0 +1,41 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.layers.samediff.SameDiffLossLayer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.Map; + +public abstract class BaseSameDiffLossLayer extends NoParamSameDiffLayer { + + protected BaseSameDiffLossLayer(Builder builder){ + super(builder); + } + + protected BaseSameDiffLossLayer(){ + //No arg for Jackson/JSON + } + + public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable label); + + @Override + public Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { + SameDiffLossLayer ret = new SameDiffLossLayer(conf); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + public static abstract class Builder> extends NoParamSameDiffLayer.Builder { + + + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java new file mode 100644 index 000000000000..4b0bffc60cb9 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java @@ -0,0 +1,92 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.deeplearning4j.nn.params.SameDiffParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.learning.config.IUpdater; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +@Data +@EqualsAndHashCode(callSuper = true) +public abstract class NoParamSameDiffLayer extends Layer { + + protected NoParamSameDiffLayer(Builder builder){ + super(builder); + } + + protected NoParamSameDiffLayer(){ + //No op constructor for Jackson + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType){ + return inputType; + } + + @Override + public void setNIn(InputType inputType, boolean override){ + //No op + } + + @Override + public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); + + public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); + + @Override + public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams); + + //================================================================================================================== + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public double getL1ByParam(String paramName) { + return 0.0; //No params + } + + @Override + public double getL2ByParam(String paramName) { + return 0.0; //No params + } + + @Override + public IUpdater getUpdaterByParam(String paramName){ + throw new UnsupportedOperationException("No parameters for this layer"); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return new LayerMemoryReport(); //TODO + } + + public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ + applyGlobalConfigToLayer(b); + } + + public static abstract class Builder> extends Layer.Builder { + + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java new file mode 100644 index 000000000000..ca0a15fe30b9 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java @@ -0,0 +1,453 @@ +package org.deeplearning4j.nn.layers.samediff; + +import lombok.Getter; +import lombok.Setter; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.api.layers.IOutputLayer; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLossLayer; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.optimize.api.ConvexOptimizer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.autodiff.functions.DifferentialFunction; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.primitives.Pair; + +import java.util.*; + +public class SameDiffLossLayer implements IOutputLayer { + + public static final String LABEL_KEY = "label"; + + protected NeuralNetConfiguration conf; + @Getter @Setter protected int index; + @Getter @Setter protected INDArray input; + @Getter @Setter private INDArray labels; + protected double score; + + @Getter @Setter protected int iterationCount; + @Getter @Setter protected int epochCount; + + protected SameDiff sameDiff; + protected SDVariable inVar; + protected SDVariable labelVar; + + + + public SameDiffLossLayer(NeuralNetConfiguration conf) { + this.conf = conf; + } + + @Override + public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + @Override + public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + + @Override + public double score() { + return score; + } + + @Override + public void computeGradientAndScore() { + Pair p = backpropGradient(null); + + sameDiff.associateArrayWithVariable(input, inVar); + sameDiff.associateArrayWithVariable(labels, labelVar); + + INDArray out = sameDiff.execAndEndResult(); + if(out.length() != 1){ + throw new IllegalStateException("Expected scalar score: got array with shape " + Arrays.toString(out.shape())); + } + + score = out.getDouble(0); + } + + @Override + public Pair backpropGradient(INDArray epsilon) { + if(input == null){ + throw new IllegalStateException("Cannot compute gradient without input (input is null)"); + } + if(labels == null){ + throw new IllegalStateException("Cannot compute gradient without labels (labels are null)"); + } + if(sameDiff == null){ + doInit(); + } + + Pair,List> p = sameDiff.execBackwards(); + + return null; + } + + + protected void doInit(){ + SameDiff sd = SameDiff.create(); + SDVariable in = sd.var("input", input); + SDVariable label = sd.var("label", labels); + + BaseSameDiffLossLayer l = (BaseSameDiffLossLayer) conf.getLayer(); + l.defineLayer(sd, in, label); + + this.sameDiff = sd; + this.inVar = in; + this.labelVar = label; + } + + + //-------------------------------------------------------------------------------------------------------- + + @Override + public double f1Score(DataSet data) { + throw new UnsupportedOperationException(); + } + + @Override + public double f1Score(INDArray examples, INDArray labels) { + throw new UnsupportedOperationException(); + } + + @Override + public int numLabels() { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(DataSetIterator iter) { + throw new UnsupportedOperationException(); + } + + @Override + public int[] predict(INDArray examples) { + throw new UnsupportedOperationException(); + } + + @Override + public List predict(DataSet dataSet) { + throw new UnsupportedOperationException(); + } + + @Override + public INDArray labelProbabilities(INDArray examples) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(INDArray examples, INDArray labels) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(DataSet data) { + throw new UnsupportedOperationException(); + } + + @Override + public void fit(INDArray examples, int[] labels) { + throw new UnsupportedOperationException(); + } + + + @Override + public void setCacheMode(CacheMode mode) { + //No op + } + + @Override + public double calcL2(boolean backpropOnlyParams) { + return 0; //No params + } + + @Override + public double calcL1(boolean backpropOnlyParams) { + return 0; //No params + } + + @Override + public Type type() { + return Type.FEED_FORWARD; + } + + @Override + public INDArray preOutput(INDArray x) { + return x; + } + + @Override + public INDArray preOutput(INDArray x, TrainingMode training) { + return x; + } + + @Override + public INDArray activate(TrainingMode training) { + return input; + } + + @Override + public INDArray activate(INDArray input, TrainingMode training) { + return input; + } + + @Override + public INDArray preOutput(INDArray x, boolean training) { + return x; + } + + @Override + public INDArray activate(boolean training) { + return input; + } + + @Override + public INDArray activate(INDArray input, boolean training) { + return input; + } + + @Override + public INDArray activate() { + return input; + } + + @Override + public INDArray activate(INDArray input) { + return input; + } + + @Override + public Layer transpose() { + throw new UnsupportedOperationException(); + } + + @Override + public Layer clone() { + throw new UnsupportedOperationException(); + } + + @Override + public Collection getListeners() { + return null; + } + + @Override + public void setListeners(IterationListener... listeners) { + //No op + } + + @Override + public void addListeners(IterationListener... listener) { + //No op + } + + @Override + public void fit() { + throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); + } + + @Override + public void update(Gradient gradient) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void update(INDArray gradient, String paramType) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void accumulateScore(double accum) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public INDArray params() { + return null; + } + + @Override + public int numParams() { + return 0; + } + + @Override + public int numParams(boolean backwards) { + return 0; + } + + @Override + public void setParams(INDArray params) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void setParamsViewArray(INDArray params) { + if(params != null) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + } + + @Override + public INDArray getGradientsViewArray() { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void fit(INDArray data) { + throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); + } + + @Override + public void iterate(INDArray input) { + throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); + } + + @Override + public Gradient gradient() { + return null; //No parameters -> no gradient + } + + @Override + public Pair gradientAndScore() { + return new Pair<>(gradient(), score()); + } + + @Override + public int batchSize() { + return 0; + } + + @Override + public NeuralNetConfiguration conf() { + return conf; + } + + @Override + public void setConf(NeuralNetConfiguration conf) { + this.conf = conf; + } + + @Override + public INDArray input() { + return input; + } + + @Override + public void validateInput() { + //No op + } + + @Override + public ConvexOptimizer getOptimizer() { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public INDArray getParam(String param) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void initParams() { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public Map paramTable() { + return Collections.emptyMap(); + } + + @Override + public Map paramTable(boolean backpropParamsOnly) { + return paramTable(); + } + + @Override + public void setParamTable(Map paramTable) { + if(paramTable != null && paramTable.size() > 0) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + } + + @Override + public void setParam(String key, INDArray val) { + throw new UnsupportedOperationException("Not supported (no parameters)"); + } + + @Override + public void clear() { + input = null; + labels = null; + } + + @Override + public void applyConstraints(int iteration, int epoch) { + //No op + } + + @Override + public void init() { + //No op + } + + @Override + public void setListeners(Collection listeners) { + //No op + } + + @Override + public void setInputMiniBatchSize(int size) { + + } + + @Override + public int getInputMiniBatchSize() { + return 0; + } + + @Override + public void setMaskArray(INDArray maskArray) { + if(maskArray != null) { + throw new UnsupportedOperationException("Mask arrays: not yet supported for SameDiffLossLayer"); + } + } + + @Override + public INDArray getMaskArray() { + return null; + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //No op + } + + @Override + public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + if(maskArray != null){ + throw new UnsupportedOperationException("Mask arrays: not yet supported for SameDiffLossLayer"); + } + return null; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java index 60df946004a2..df8414163097 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -17,6 +17,8 @@ public class SameDiffOutputLayer extends SameDiffLayer implements IOutputLayer { public static final String LABEL_KEY = "label"; + private INDArray labels; + public SameDiffOutputLayer(NeuralNetConfiguration conf) { @@ -25,22 +27,22 @@ public SameDiffOutputLayer(NeuralNetConfiguration conf) { @Override public void setLabels(INDArray labels) { - + this.labels = labels; } @Override public INDArray getLabels() { - return null; + return labels; } @Override public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { - return 0; + throw new UnsupportedOperationException("Not yet implemented"); } @Override public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { - return null; + throw new UnsupportedOperationException("Not yet implemented"); } @Override From dbac4b8e10caed26341eb5df20246650150c30c8 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 3 Jan 2018 14:19:58 +1100 Subject: [PATCH 22/34] Fix, cleanup --- .../samediff/TestSameDiffLoss.java | 190 +----------------- .../samediff/testlayers/SameDiffLoss.java | 1 + 2 files changed, 8 insertions(+), 183 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java index cdca6e5330c5..ef9351118a96 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java @@ -40,190 +40,14 @@ public void testSameDiffLossBasic() { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray in = Nd4j.create(minibatch, nIn); + INDArray in = Nd4j.rand(minibatch, nIn); INDArray out = net.output(in); - assertEquals(in, out); - } - - @Test - public void test(){ - - SameDiff sd = SameDiff.create(); - - int nIn = 3; - int nOut = 4; - int minibatch = 3; - SDVariable input = sd.var("in", new int[]{-1,nIn}); - SDVariable label = sd.var("label", new int[]{-1, nOut}); - SDVariable weights = sd.var("W", new int[]{nIn,nOut}); - SDVariable bias = sd.var("b", new int[]{1,nOut}); - - - SDVariable mmul = sd.mmul("mmul", input, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = sd.tanh(z); - - SDVariable diff = out.sub(label); - SDVariable sqDiff = diff.mul(diff); - SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); - SDVariable avgMSE = sd.mean("loss", msePerEx, 0); - - INDArray inputArr = Nd4j.rand(minibatch, nIn); - INDArray labelArr = Nd4j.rand(minibatch, nOut); - INDArray weightsArr = Nd4j.rand(nIn, nOut); - INDArray biasArr = Nd4j.rand(1,nOut); - - sd.associateArrayWithVariable(inputArr, input); - sd.associateArrayWithVariable(labelArr, label); - sd.associateArrayWithVariable(weightsArr, weights); - sd.associateArrayWithVariable(biasArr, bias); - - INDArray result = sd.execAndEndResult(); - } - - @Test - public void testPlaceholderReduceSimple() { - SameDiff sd = SameDiff.create(); - SDVariable v = sd.var("in", new int[]{-1, 10}); - SDVariable vSum = sd.sum(v, 1); //Exception here - } - - @Test - public void testSequentialMeans() { - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("in", new int[]{10, 10, 10}); - SDVariable mean1 = sd.mean(in, 2); //[10,10] out - SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out - - System.out.println(sd.asFlatPrint()); - } - - @Test - public void testSequentialMeansPlaceholder() { - for( int dim0 : new int[]{10, -1}){ - String msg = "Dimension 0 = " + dim0; - System.out.println(msg); - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("in", new int[]{dim0, 9, 8}); - SDVariable mean1 = sd.mean(in, 2); //[10,9,8] -> [10,9] - SDVariable mean2 = sd.mean(mean1, 1); //[10,9] -> [10,1] - - INDArray inArr = Nd4j.create(10, 9, 8); - sd.associateArrayWithVariable(inArr, in); - - INDArray out = sd.execAndEndResult(); - - assertArrayEquals(msg, new int[]{10,1}, out.shape()); - } - } - - @Test - public void testReductionShapes1() { - - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("in", new int[]{10, 9, 8}); - SDVariable mean1 = sd.mean(in, 2); //[10,9] out - SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out - sd.execAndEndResult(); //***Exception*** - - INDArray m1 = mean1.getArr(); - INDArray m2 = mean2.getArr(); - - assertArrayEquals(new int[]{10, 9}, m1.shape()); - assertArrayEquals(new int[]{10, 1}, m2.shape()); - } - - @Test - public void testReductionShapes2() { - - SameDiff sd2 = SameDiff.create(); - SDVariable in2 = sd2.var("in", new int[]{10, 9, 8}); - SDVariable meanA = sd2.mean(in2, 0); //[9,8] out - SDVariable meanB = sd2.mean(meanA, 0); //[1,8] out - sd2.execAndEndResult(); //***Exception*** - - INDArray mA = meanA.getArr(); - INDArray mB = meanB.getArr(); - - assertArrayEquals(new int[]{9, 8}, mA.shape()); - assertArrayEquals(new int[]{1, 8}, mB.shape()); - } - - @Test - public void testSameDiffOutputForward() { - - for (int minibatch : new int[]{5, 1}) { - int nIn = 3; - int nOut = 4; - - LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ - LossFunctions.LossFunction.MSE, -// LossFunctions.LossFunction.MCXENT, -// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, -// LossFunctions.LossFunction.L2, -// LossFunctions.LossFunction.SQUARED_LOSS, -// LossFunctions.LossFunction.KL_DIVERGENCE, -// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, -// LossFunctions.LossFunction.XENT, -// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR - }; - - Activation[] afns = new Activation[]{ - Activation.TANH, //MSE -// Activation.SOFTMAX, //MCXENT -// Activation.SOFTMAX, //NLL -// Activation.SOFTPLUS, //L2 -// Activation.TANH, //Squared loss -// Activation.SIGMOID, //KLD -// Activation.TANH, //Squared loss -// Activation.SIGMOID //MSLE - }; - - for (int i = 0; i < lossFns.length; i++) { - LossFunctions.LossFunction lf = lossFns[i]; - Activation a = afns[i]; - log.info("Starting test - " + lf + ", " + a); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffOutput.Builder().nIn(nIn).nOut(nOut) - .lossFunction(lf) - .activation(a) - .build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertNotNull(net.paramTable()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(params2, params1); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - - assertEquals(outExp, out); - - //Also check serialization: - MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); - INDArray outLoaded = netLoaded.output(in); + assertArrayEquals(new int[]{minibatch, nOut}, out.shape()); - assertEquals(outExp, outLoaded); - } - } + INDArray label = Nd4j.rand(minibatch, nOut); + net.setLabels(label); + net.computeGradientAndScore(); + double score = net.score(); + assertTrue(score > 0); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java index 1dffc0ec237a..0b57956fd0bd 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java @@ -27,6 +27,7 @@ public class SameDiffLoss extends BaseSameDiffLossLayer { protected SameDiffLoss(Builder builder) { super(builder); + this.lossFn = builder.lossFn; } private SameDiffLoss() { From 29a15eea9d9d08d82d46d91b4d364485ebf4fcef Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 3 Jan 2018 23:31:17 +1100 Subject: [PATCH 23/34] SameDiff loss layer: fixes; score and forward passing for MSE --- .../samediff/TestSameDiffLoss.java | 92 +++++++++++++++++++ .../nn/layers/samediff/SameDiffLossLayer.java | 15 ++- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java index ef9351118a96..12b7b39296ce 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java @@ -5,18 +5,22 @@ import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.LossLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.samediff.testlayers.SameDiffLoss; import org.deeplearning4j.samediff.testlayers.SameDiffOutput; import org.junit.Test; +import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; +import java.util.List; import java.util.Map; import static org.junit.Assert.*; @@ -50,4 +54,92 @@ public void testSameDiffLossBasic() { double score = net.score(); assertTrue(score > 0); } + + @Test + public void testSameDiffLossVsDl4j() { + + for (int minibatch : new int[]{5, 1}) { + int nIn = 3; + int nOut = 4; + + LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ + LossFunctions.LossFunction.MSE, +// LossFunctions.LossFunction.MCXENT, +// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, +// LossFunctions.LossFunction.L2, +// LossFunctions.LossFunction.SQUARED_LOSS, +// LossFunctions.LossFunction.KL_DIVERGENCE, +// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, +// LossFunctions.LossFunction.XENT, +// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR + }; + + for (int i = 0; i < lossFns.length; i++) { + LossFunctions.LossFunction lf = lossFns[i]; + log.info("Starting test - " + lf); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.TANH).build()) + .layer(new SameDiffLoss.Builder() + .lossFunction(lf) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertNotNull(net.paramTable()); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.TANH).build()) + .layer(new LossLayer.Builder() + .lossFunction(lf) + .activation(Activation.IDENTITY) + .build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net.params().assign(net2.params()); + + //Check params: + assertEquals(net2.params(), net.params()); + Map params1 = net.paramTable(); + Map params2 = net2.paramTable(); + assertEquals(params2, params1); + + INDArray in = Nd4j.rand(minibatch, nIn); + INDArray out = net.output(in); + INDArray outExp = net2.output(in); + + assertEquals(outExp, out); + + //Check scores: + INDArray label = Nd4j.rand(minibatch, nOut); + net.setLabels(label); + net2.setLabels(label); + + net.computeGradientAndScore(); + net2.computeGradientAndScore(); + + double scoreExp = net2.score(); + double scoreAct = net.score(); + assertTrue(scoreExp > 0); + assertEquals(scoreExp, scoreAct, 1e-6); + + INDArray gradExp = net2.getFlattenedGradients(); + INDArray gradAct = net.getFlattenedGradients(); + + assertEquals(gradExp, gradAct); + + //Also check serialization: + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); + INDArray outLoaded = netLoaded.output(in); + + assertEquals(outExp, outLoaded); + } + } + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java index ca0a15fe30b9..22fc37fb7a0d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java @@ -9,6 +9,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLossLayer; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; +import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.IterationListener; @@ -39,6 +40,10 @@ public class SameDiffLossLayer implements IOutputLayer { protected SDVariable inVar; protected SDVariable labelVar; + protected Gradient emptyGrad = new DefaultGradient(); + + protected double fullNetL1; + protected double fullNetL2; public SameDiffLossLayer(NeuralNetConfiguration conf) { @@ -47,7 +52,10 @@ public SameDiffLossLayer(NeuralNetConfiguration conf) { @Override public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { - throw new UnsupportedOperationException("Not yet implemented"); + this.fullNetL1 = fullNetworkL1; + this.fullNetL2 = fullNetworkL2; + computeGradientAndScore(); + return score; } @Override @@ -74,6 +82,7 @@ public void computeGradientAndScore() { } score = out.getDouble(0); + score += fullNetL1 + fullNetL2; } @Override @@ -90,7 +99,9 @@ public Pair backpropGradient(INDArray epsilon) { Pair,List> p = sameDiff.execBackwards(); - return null; + SDVariable inGrad = sameDiff.grad(inVar.getVarName()); + + return new Pair<>(emptyGrad, inGrad.getArr()); } From 578eb4fc3cb161090c9742a9511e014bc9fa52c5 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 5 Jan 2018 14:50:48 +1100 Subject: [PATCH 24/34] Debugging testS --- .../samediff/TestSameDiffLoss.java | 136 ++++++++++++++++++ .../samediff/testlayers/SameDiffLoss.java | 1 + .../nn/layers/samediff/SameDiffLossLayer.java | 1 + 3 files changed, 138 insertions(+) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java index 12b7b39296ce..0ad8e2042da2 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java @@ -55,6 +55,142 @@ public void testSameDiffLossBasic() { assertTrue(score > 0); } + @Test + public void testReductionsBackwards() { + + for (int i = 1; i < 7; i++) { + + SameDiff sd = SameDiff.create(); + + int nOut = 4; + int minibatch = 3; + SDVariable input = sd.var("in", new int[]{-1, nOut}); + SDVariable label = sd.var("label", new int[]{-1, nOut}); + + SDVariable diff = input.sub(label); + SDVariable sqDiff = diff.mul(diff); + SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); + + SDVariable loss; + String name; + switch (i) { + case 0: + loss = sd.mean("loss", msePerEx, 0); + name = "mean"; + break; + case 1: + loss = sd.sum("loss", msePerEx, 0); + name = "sum"; + break; + case 2: + loss = sd.standardDeviation("loss", msePerEx, true, 0); + name = "stdev"; + break; + case 3: + loss = sd.min("loss", msePerEx, 0); + name = "min"; + break; + case 4: + loss = sd.max("loss", msePerEx, 0); + name = "max"; + break; + case 5: + loss = sd.variance("loss", msePerEx, true, 0); + name = "variance"; + break; + case 6: + loss = sd.prod("loss", msePerEx, 0); + name = "prod"; + break; + default: + throw new RuntimeException(); + } + + + String msg = "test: " + i + " - " + name; + log.info("*** Starting test: " + msg); + + INDArray inputArr = Nd4j.rand(minibatch, nOut); + INDArray labelArr = Nd4j.rand(minibatch, nOut); + + sd.associateArrayWithVariable(inputArr, input); + sd.associateArrayWithVariable(labelArr, label); + + INDArray result = sd.execAndEndResult(); + assertEquals(1, result.length()); + + Pair, List> p = sd.execBackwards(); + } + } + + @Test + public void testReductionsBackwards2() { + + for (int i = 0; i < 7; i++) { + + SameDiff sd = SameDiff.create(); + + int nOut = 4; + int minibatch = 3; + SDVariable input = sd.var("in", new int[]{-1, nOut}); + SDVariable label = sd.var("label", new int[]{-1, nOut}); + + SDVariable diff = input.sub(label); + SDVariable sqDiff = diff.mul(diff); + SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); + + SDVariable loss; + String name; + switch (i) { + case 0: + loss = sd.mean("loss", msePerEx); + name = "mean"; + break; + case 1: + loss = sd.sum("loss", msePerEx); + name = "sum"; + break; + case 2: + loss = sd.standardDeviation("loss", msePerEx, true); + name = "stdev"; + break; + case 3: + loss = sd.min("loss", msePerEx); + name = "min"; + break; + case 4: + loss = sd.max("loss", msePerEx); + name = "max"; + break; + case 5: + loss = sd.variance("loss", msePerEx, true); + name = "variance"; + break; + case 6: + loss = sd.prod("loss", msePerEx); + name = "prod"; + break; + default: + throw new RuntimeException(); + } + + + String msg = "test: " + i + " - " + name; + log.info("*** Starting test: " + msg); + + INDArray inputArr = Nd4j.rand(minibatch, nOut); + INDArray labelArr = Nd4j.rand(minibatch, nOut); + + sd.associateArrayWithVariable(inputArr, input); + sd.associateArrayWithVariable(labelArr, label); + + INDArray result = sd.execAndEndResult(); + assertEquals(1, result.length()); + + Pair, List> p = sd.execBackwards(); + } + } + @Test public void testSameDiffLossVsDl4j() { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java index 0b57956fd0bd..d28cb46e9211 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java @@ -55,6 +55,7 @@ public void defineLayer(SameDiff sd, SDVariable input, SDVariable label) { SDVariable sqDiff = diff.mul(diff); SDVariable mse = sd.mean(lossKeys.getFirst(), sqDiff, 1); SDVariable score = sd.mean(lossKeys.getSecond(), mse); +// SDVariable score = sd.sum(lossKeys.getSecond(), mse); break; default: throw new UnsupportedOperationException("Not yet implemented: " + lossFn); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java index 22fc37fb7a0d..2d9ff967e828 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java @@ -83,6 +83,7 @@ public void computeGradientAndScore() { score = out.getDouble(0); score += fullNetL1 + fullNetL2; +// score /= input.size(0); } @Override From 196ab9e951123947cd8eaacbed5868d94bc6454d Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 8 Jan 2018 15:41:47 +1100 Subject: [PATCH 25/34] Clean up debug tests --- .../samediff/TestSameDiffConv.java | 198 ------------------ 1 file changed, 198 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index 5cfa3660590e..a4c265572b91 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -64,80 +64,6 @@ public void testSameDiffConvBasic() { TestUtils.testModelSerialization(net); } - @Test - public void testSameDiffConvForward_Debug() { - - int imgH = 3; - int imgW = 3; - int count = 0; - int minibatch = 1; - boolean hasBias = false; - int nIn = 1; - int nOut = 1; - int[] kernel = {2, 2}; - int[] strides = {1, 1}; - int[] dilation = {2, 1}; - ConvolutionMode cm = ConvolutionMode.Truncate; - Activation a = Activation.TANH; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffConv.Builder() - .nIn(nIn) - .nOut(nOut) - .kernelSize(kernel) - .stride(strides) - .dilation(dilation) - .convolutionMode(cm) - .activation(a) - .hasBias(hasBias) - .build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertNotNull(net.paramTable()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new ConvolutionLayer.Builder() - .nIn(nIn) - .nOut(nOut) - .kernelSize(kernel) - .stride(strides) - .dilation(dilation) -// .dilation(new int[]{dilation[1], dilation[0]}) - .convolutionMode(cm) - .activation(a) - .hasBias(hasBias) - .build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(params2, params1); - - INDArray in = Nd4j.rand(new int[]{minibatch, nIn, imgH, imgW}); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - - assertEquals(outExp, out); - - //Also check serialization: - MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); - INDArray outLoaded = netLoaded.output(in); - - assertEquals(outExp, outLoaded); - } - @Test public void testSameDiffConvForward() { @@ -270,128 +196,4 @@ public void testSameDiffConvForward() { } } } - - - @Test - public void testConv2dEdgeCase(){ - Nd4j.getExecutioner().enableVerboseMode(true); - Nd4j.getExecutioner().enableDebugMode(true); - - INDArray in = Nd4j.create(1,1,1,4).assign(Nd4j.linspace(1,4,4)).muli(10); //NCHW - INDArray wArr = Nd4j.create(4,1,1,1); //dOut, dIt, kH, kW - wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,4,4)).addi(0.5); - - SameDiff sd = SameDiff.create(); - SDVariable i = sd.var("in", in); - SDVariable w = sd.var("w", wArr); - - Conv2DConfig conf = Conv2DConfig.builder() - .isSameMode(false) - .kw(1) - .kh(1) - .dh(1) - .dw(1) - .sy(1) - .sx(1) - .ph(0) - .pw(0) - .build(); - - SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); - - INDArray out = sd.execAndEndResult(); - - - //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here - INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); - INDArray exp = Nd4j.concat(1, in, in, in, in); - Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); - - for(int j=0; j<4; j++ ){ - System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); - } - - assertEquals(exp, out); - } - - - @Test - public void testConv2dEdgeCase2(){ - - INDArray in = Nd4j.create(1,1,1,4).assign(Nd4j.linspace(1,4,4)).muli(10); //NCHW - INDArray wArr = Nd4j.create(3,1,1,1); //dOut, dIt, kH, kW - wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,3,3)).addi(0.5); - - SameDiff sd = SameDiff.create(); - SDVariable i = sd.var("in", in); - SDVariable w = sd.var("w", wArr); - - Conv2DConfig conf = Conv2DConfig.builder() - .isSameMode(false) - .kw(1) - .kh(1) - .dh(1) - .dw(1) - .sy(1) - .sx(1) - .ph(0) - .pw(0) - .build(); - - SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); - - INDArray out = sd.execAndEndResult(); - - - //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here - INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); - INDArray exp = Nd4j.concat(1, in, in, in); - Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); - - for(int j=0; j<3; j++ ){ - System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); - } - - assertEquals(exp, out); - } - - @Test - public void testConv2dEdgeCase3(){ - - INDArray in = Nd4j.create(1,1,1,3).assign(Nd4j.linspace(1,3,3)).muli(10); //NCHW - INDArray wArr = Nd4j.create(4,1,1,1); //dOut, dIt, kH, kW - wArr.get(all(), point(0), point(0), point(0)).assign(Nd4j.linspace(1,4,4)).addi(0.5); - - SameDiff sd = SameDiff.create(); - SDVariable i = sd.var("in", in); - SDVariable w = sd.var("w", wArr); - - Conv2DConfig conf = Conv2DConfig.builder() - .isSameMode(false) - .kw(1) - .kh(1) - .dh(1) - .dw(1) - .sy(1) - .sx(1) - .ph(0) - .pw(0) - .build(); - - SDVariable conv2d = sd.conv2d(new SDVariable[]{i,w}, conf); - - INDArray out = sd.execAndEndResult(); - - - //1x1 conv edge case: equivalent to linear op for each position. Also: depth 1 in allows us to use concat + mul here - INDArray wVec = wArr.get( all(), point(0), point(0), point(0)); - INDArray exp = Nd4j.concat(1, in, in, in, in); - Nd4j.getExecutioner().exec(new BroadcastMulOp(exp, wVec, exp, 1)); - - for(int j=0; j<4; j++ ){ - System.out.println(exp.get(point(0), point(j), all(), all()) + "\t" + out.get(point(0), point(j), all(), all())); - } - - assertEquals(exp, out); - } } From 6fc7581f81925a3ae836134a9a8b561e78b13e9a Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 8 Jan 2018 16:25:03 +1100 Subject: [PATCH 26/34] SameDiff loss layer: implement + test computeScoreForExamples --- .../samediff/TestSameDiffConv.java | 2 +- .../samediff/TestSameDiffLoss.java | 150 ++++++++++-------- .../samediff/testlayers/SameDiffLoss.java | 7 +- .../samediff/BaseSameDiffLossLayer.java | 2 + .../nn/layers/samediff/SameDiffLossLayer.java | 17 +- 5 files changed, 111 insertions(+), 67 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index a4c265572b91..5f1cb7f3c7c1 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -76,7 +76,7 @@ public void testSameDiffConvForward() { //With n=1, m=3 this is 1 out of every 3 tests (on average) Random r = new Random(12345); int n = 1; - int m = 3; + int m = 5; for (int minibatch : new int[]{5, 1}) { Activation[] afns = new Activation[]{ diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java index 0ad8e2042da2..75609cc8f3ee 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java @@ -16,6 +16,7 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.primitives.Pair; @@ -194,6 +195,9 @@ public void testReductionsBackwards2() { @Test public void testSameDiffLossVsDl4j() { + double[] l1s = new double[]{0.0, 0.0, 0.4, 0.4}; + double[] l2s = new double[]{0.0, 0.3, 0.0, 0.3}; + for (int minibatch : new int[]{5, 1}) { int nIn = 3; int nOut = 4; @@ -211,70 +215,88 @@ public void testSameDiffLossVsDl4j() { }; for (int i = 0; i < lossFns.length; i++) { - LossFunctions.LossFunction lf = lossFns[i]; - log.info("Starting test - " + lf); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.TANH).build()) - .layer(new SameDiffLoss.Builder() - .lossFunction(lf) - .build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertNotNull(net.paramTable()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.TANH).build()) - .layer(new LossLayer.Builder() - .lossFunction(lf) - .activation(Activation.IDENTITY) - .build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(params2, params1); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - - assertEquals(outExp, out); - - //Check scores: - INDArray label = Nd4j.rand(minibatch, nOut); - net.setLabels(label); - net2.setLabels(label); - - net.computeGradientAndScore(); - net2.computeGradientAndScore(); - - double scoreExp = net2.score(); - double scoreAct = net.score(); - assertTrue(scoreExp > 0); - assertEquals(scoreExp, scoreAct, 1e-6); - - INDArray gradExp = net2.getFlattenedGradients(); - INDArray gradAct = net.getFlattenedGradients(); - - assertEquals(gradExp, gradAct); - - //Also check serialization: - MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); - INDArray outLoaded = netLoaded.output(in); - - assertEquals(outExp, outLoaded); + + for( int j=0; j(lossPerExampleVar(), "score"); + } + + @Override + public String lossPerExampleVar(){ + return "lossPerEx"; } @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java index e5bcada3c439..26c98b135f87 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java @@ -23,6 +23,8 @@ protected BaseSameDiffLossLayer(){ public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable label); + public abstract String lossPerExampleVar(); + @Override public Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { SameDiffLossLayer ret = new SameDiffLossLayer(conf); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java index 2d9ff967e828..f4dbd00f7c77 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java @@ -1,5 +1,6 @@ package org.deeplearning4j.nn.layers.samediff; +import com.google.common.base.Preconditions; import lombok.Getter; import lombok.Setter; import org.deeplearning4j.nn.api.Layer; @@ -60,7 +61,21 @@ public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean t @Override public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { - throw new UnsupportedOperationException("Not yet implemented"); + Preconditions.checkNotNull(input, "Input cannot be null when calculating score for examples"); + Preconditions.checkNotNull(labels, "Labels cannot be null when calculating score for exapmles"); + + sameDiff.associateArrayWithVariable(input, inVar); + sameDiff.associateArrayWithVariable(labels, labelVar); + + sameDiff.exec(); + + //Next: need to determine name of the "score for each example" component... + String key = ((BaseSameDiffLossLayer)conf().getLayer()).lossPerExampleVar(); + INDArray out = sameDiff.getVariable(key).getArr(); + if(fullNetworkL1 > 0 || fullNetworkL2 > 0){ + out.addi(fullNetworkL1 + fullNetworkL2); + } + return out; } From 94658a68dd344df40e2600e7a13fc4718f171e76 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 19 Feb 2018 21:12:49 +1100 Subject: [PATCH 27/34] Remove samediff output/loss layers for now --- .../samediff/TestSameDiffLoss.java | 303 ----------- .../samediff/TestSameDiffOutput.java | 241 --------- .../samediff/testlayers/SameDiffLoss.java | 89 ---- .../samediff/testlayers/SameDiffOutput.java | 226 --------- .../samediff/BaseSameDiffLossLayer.java | 43 -- .../samediff/BaseSameDiffOutputLayer.java | 59 --- .../nn/layers/samediff/SameDiffLayer.java | 4 - .../nn/layers/samediff/SameDiffLossLayer.java | 480 ------------------ .../layers/samediff/SameDiffOutputLayer.java | 127 ----- 9 files changed, 1572 deletions(-) delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffOutput.java delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java delete mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java deleted file mode 100644 index 75609cc8f3ee..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffLoss.java +++ /dev/null @@ -1,303 +0,0 @@ -package org.deeplearning4j.samediff; - -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.LossLayer; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.deeplearning4j.samediff.testlayers.SameDiffLoss; -import org.deeplearning4j.samediff.testlayers.SameDiffOutput; -import org.junit.Test; -import org.nd4j.autodiff.functions.DifferentialFunction; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.primitives.Pair; - -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.*; - -@Slf4j -public class TestSameDiffLoss { - - @Test - public void testSameDiffLossBasic() { - - int minibatch = 3; - int nIn = 3; - int nOut = 4; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().nIn(3).nOut(4).activation(Activation.TANH).build()) - .layer(new SameDiffLoss.Builder().lossFunction(LossFunctions.LossFunction.MSE).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - assertArrayEquals(new int[]{minibatch, nOut}, out.shape()); - - INDArray label = Nd4j.rand(minibatch, nOut); - net.setLabels(label); - net.computeGradientAndScore(); - double score = net.score(); - assertTrue(score > 0); - } - - @Test - public void testReductionsBackwards() { - - for (int i = 1; i < 7; i++) { - - SameDiff sd = SameDiff.create(); - - int nOut = 4; - int minibatch = 3; - SDVariable input = sd.var("in", new int[]{-1, nOut}); - SDVariable label = sd.var("label", new int[]{-1, nOut}); - - SDVariable diff = input.sub(label); - SDVariable sqDiff = diff.mul(diff); - SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); - - SDVariable loss; - String name; - switch (i) { - case 0: - loss = sd.mean("loss", msePerEx, 0); - name = "mean"; - break; - case 1: - loss = sd.sum("loss", msePerEx, 0); - name = "sum"; - break; - case 2: - loss = sd.standardDeviation("loss", msePerEx, true, 0); - name = "stdev"; - break; - case 3: - loss = sd.min("loss", msePerEx, 0); - name = "min"; - break; - case 4: - loss = sd.max("loss", msePerEx, 0); - name = "max"; - break; - case 5: - loss = sd.variance("loss", msePerEx, true, 0); - name = "variance"; - break; - case 6: - loss = sd.prod("loss", msePerEx, 0); - name = "prod"; - break; - default: - throw new RuntimeException(); - } - - - String msg = "test: " + i + " - " + name; - log.info("*** Starting test: " + msg); - - INDArray inputArr = Nd4j.rand(minibatch, nOut); - INDArray labelArr = Nd4j.rand(minibatch, nOut); - - sd.associateArrayWithVariable(inputArr, input); - sd.associateArrayWithVariable(labelArr, label); - - INDArray result = sd.execAndEndResult(); - assertEquals(1, result.length()); - - Pair, List> p = sd.execBackwards(); - } - } - - @Test - public void testReductionsBackwards2() { - - for (int i = 0; i < 7; i++) { - - SameDiff sd = SameDiff.create(); - - int nOut = 4; - int minibatch = 3; - SDVariable input = sd.var("in", new int[]{-1, nOut}); - SDVariable label = sd.var("label", new int[]{-1, nOut}); - - SDVariable diff = input.sub(label); - SDVariable sqDiff = diff.mul(diff); - SDVariable msePerEx = sd.mean("msePerEx", sqDiff, 1); - - SDVariable loss; - String name; - switch (i) { - case 0: - loss = sd.mean("loss", msePerEx); - name = "mean"; - break; - case 1: - loss = sd.sum("loss", msePerEx); - name = "sum"; - break; - case 2: - loss = sd.standardDeviation("loss", msePerEx, true); - name = "stdev"; - break; - case 3: - loss = sd.min("loss", msePerEx); - name = "min"; - break; - case 4: - loss = sd.max("loss", msePerEx); - name = "max"; - break; - case 5: - loss = sd.variance("loss", msePerEx, true); - name = "variance"; - break; - case 6: - loss = sd.prod("loss", msePerEx); - name = "prod"; - break; - default: - throw new RuntimeException(); - } - - - String msg = "test: " + i + " - " + name; - log.info("*** Starting test: " + msg); - - INDArray inputArr = Nd4j.rand(minibatch, nOut); - INDArray labelArr = Nd4j.rand(minibatch, nOut); - - sd.associateArrayWithVariable(inputArr, input); - sd.associateArrayWithVariable(labelArr, label); - - INDArray result = sd.execAndEndResult(); - assertEquals(1, result.length()); - - Pair, List> p = sd.execBackwards(); - } - } - - @Test - public void testSameDiffLossVsDl4j() { - - double[] l1s = new double[]{0.0, 0.0, 0.4, 0.4}; - double[] l2s = new double[]{0.0, 0.3, 0.0, 0.3}; - - for (int minibatch : new int[]{5, 1}) { - int nIn = 3; - int nOut = 4; - - LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ - LossFunctions.LossFunction.MSE, -// LossFunctions.LossFunction.MCXENT, -// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, -// LossFunctions.LossFunction.L2, -// LossFunctions.LossFunction.SQUARED_LOSS, -// LossFunctions.LossFunction.KL_DIVERGENCE, -// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, -// LossFunctions.LossFunction.XENT, -// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR - }; - - for (int i = 0; i < lossFns.length; i++) { - - for( int j=0; j [10,9] - SDVariable mean2 = sd.mean(mean1, 1); //[10,9] -> [10,1] - - INDArray inArr = Nd4j.create(10, 9, 8); - sd.associateArrayWithVariable(inArr, in); - - INDArray out = sd.execAndEndResult(); - - assertArrayEquals(msg, new int[]{10,1}, out.shape()); - } - } - - @Test - public void testReductionShapes1() { - - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("in", new int[]{10, 9, 8}); - SDVariable mean1 = sd.mean(in, 2); //[10,9] out - SDVariable mean2 = sd.mean(mean1, 1); //[10,1] out - sd.execAndEndResult(); //***Exception*** - - INDArray m1 = mean1.getArr(); - INDArray m2 = mean2.getArr(); - - assertArrayEquals(new int[]{10, 9}, m1.shape()); - assertArrayEquals(new int[]{10, 1}, m2.shape()); - } - - @Test - public void testReductionShapes2() { - - SameDiff sd2 = SameDiff.create(); - SDVariable in2 = sd2.var("in", new int[]{10, 9, 8}); - SDVariable meanA = sd2.mean(in2, 0); //[9,8] out - SDVariable meanB = sd2.mean(meanA, 0); //[1,8] out - sd2.execAndEndResult(); //***Exception*** - - INDArray mA = meanA.getArr(); - INDArray mB = meanB.getArr(); - - assertArrayEquals(new int[]{9, 8}, mA.shape()); - assertArrayEquals(new int[]{1, 8}, mB.shape()); - } - - @Test - public void testSameDiffOutputForward() { - - for (int minibatch : new int[]{5, 1}) { - int nIn = 3; - int nOut = 4; - - LossFunctions.LossFunction[] lossFns = new LossFunctions.LossFunction[]{ - LossFunctions.LossFunction.MSE, -// LossFunctions.LossFunction.MCXENT, -// LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, -// LossFunctions.LossFunction.L2, -// LossFunctions.LossFunction.SQUARED_LOSS, -// LossFunctions.LossFunction.KL_DIVERGENCE, -// LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR, -// LossFunctions.LossFunction.XENT, -// LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR - }; - - Activation[] afns = new Activation[]{ - Activation.TANH, //MSE -// Activation.SOFTMAX, //MCXENT -// Activation.SOFTMAX, //NLL -// Activation.SOFTPLUS, //L2 -// Activation.TANH, //Squared loss -// Activation.SIGMOID, //KLD -// Activation.TANH, //Squared loss -// Activation.SIGMOID //MSLE - }; - - for (int i = 0; i < lossFns.length; i++) { - LossFunctions.LossFunction lf = lossFns[i]; - Activation a = afns[i]; - log.info("Starting test - " + lf + ", " + a); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SameDiffOutput.Builder().nIn(nIn).nOut(nOut) - .lossFunction(lf) - .activation(a) - .build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertNotNull(net.paramTable()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - net.params().assign(net2.params()); - - //Check params: - assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); - assertEquals(params2, params1); - - INDArray in = Nd4j.rand(minibatch, nIn); - INDArray out = net.output(in); - INDArray outExp = net2.output(in); - - assertEquals(outExp, out); - - //Also check serialization: - MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); - INDArray outLoaded = netLoaded.output(in); - - assertEquals(outExp, outLoaded); - } - } - } -} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java deleted file mode 100644 index 726bba3c0d80..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffLoss.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.deeplearning4j.samediff.testlayers; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLossLayer; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; -import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.primitives.Pair; -import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; - -@Data -@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) -@JsonIgnoreProperties("paramShapes") -public class SameDiffLoss extends BaseSameDiffLossLayer { - - private LossFunctions.LossFunction lossFn; - - protected SameDiffLoss(Builder builder) { - super(builder); - this.lossFn = builder.lossFn; - } - - private SameDiffLoss() { - //No op constructor for Jackson - } - - public Pair lossKeys() { - return new Pair<>(lossPerExampleVar(), "score"); - } - - @Override - public String lossPerExampleVar(){ - return "lossPerEx"; - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return null; - } - - @Override - public void defineLayer(SameDiff sd, SDVariable input, SDVariable label) { - - Pair lossKeys = lossKeys(); - SDVariable loss; - - switch (lossFn) { - case MSE: - SDVariable diff = input.sub(label); - SDVariable sqDiff = diff.mul(diff); - SDVariable mse = sd.mean(lossKeys.getFirst(), sqDiff, 1); - SDVariable score = sd.mean(lossKeys.getSecond(), mse); -// SDVariable score = sd.sum(lossKeys.getSecond(), mse); - break; - default: - throw new UnsupportedOperationException("Not yet implemented: " + lossFn); - } - } - - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { - - } - - public static class Builder extends BaseSameDiffLossLayer.Builder { - - private LossFunctions.LossFunction lossFn = LossFunctions.LossFunction.MSE; - - public Builder lossFunction(LossFunctions.LossFunction lossFn) { - this.lossFn = lossFn; - return this; - } - - @Override - public SameDiffLoss build() { - return new SameDiffLoss(this); - } - } -} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java deleted file mode 100644 index 0d6873e43889..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffOutput.java +++ /dev/null @@ -1,226 +0,0 @@ -package org.deeplearning4j.samediff.testlayers; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; -import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.ops.LossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.primitives.Pair; -import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; - -@Data -@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) -@JsonIgnoreProperties("paramShapes") -public class SameDiffOutput extends BaseSameDiffOutputLayer { - - private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); - private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); - private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); - - private Map paramShapes; - - private int nIn; - private int nOut; - private Activation activation; - private LossFunctions.LossFunction lossFn; - - protected SameDiffOutput(Builder builder) { - super(builder); - - nIn = builder.nIn; - nOut = builder.nOut; - activation = builder.activation; - lossFn = builder.lossFn; - } - - private SameDiffOutput() { - //No op constructor for Jackson - } - - @Override - public String outputActivationsKey() { - return "out"; - } - - @Override - public Pair lossKeys() { - return new Pair<>("lossPerEx", "score"); - } - - @Override - public int[] labelShape() { - return new int[]{-1, nOut}; - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return null; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (override) { - this.nIn = ((InputType.InputTypeFeedForward) inputType).getSize(); - } - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return null; - } - - @Override - public List weightKeys() { - return W_KEYS; - } - - @Override - public List biasKeys() { - return B_KEYS; - } - - @Override - public Map paramShapes() { - if (paramShapes == null) { - paramShapes = new HashMap<>(); - paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); - paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); - } - return paramShapes; - } - - @Override - public List defineLayer(SameDiff sd, SDVariable layerInput, SDVariable layerLabel, Map paramTable) { - SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); - SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); - - SDVariable mmul = sd.mmul("mmul", layerInput, weights); - SDVariable z = mmul.add("z", bias); - SDVariable out = activation.asSameDiff("out", sd, z); - -// //TODO for now: Calculate MSE only -// SDVariable diff = out.sub(layerLabel); -// SDVariable sqDiff = diff.mul(diff); -// SDVariable mse = sd.loss - - int[] labelShape = labelShape(); - Pair lossKeys = lossKeys(); - SDVariable loss; - /* - int d = 1; - switch (lossFn){ - case MSE: - loss = sd.lossMSE( lossKey, out, layerLabel, d); - break; - case L1: - loss = sd.lossL1( lossKey, out, layerLabel, d); - break; - case XENT: - loss = sd.lossBinaryXENT( lossKey, out, layerLabel, d); - break; - case MCXENT: - loss = sd.lossMCXENT( lossKey, out, layerLabel, d); - break; - case SQUARED_LOSS: - loss = sd.lossMSE( lossKey + "-pre", out, layerLabel, d).mul( lossKey, labelShape[1]); - break; - case NEGATIVELOGLIKELIHOOD: - loss = sd.lossNegativeLogLikelihood( lossKey, out, layerLabel, d); - break; - case HINGE: - loss = sd.lossHinge( lossKey, out, layerLabel, d); - break; - case SQUARED_HINGE: - loss = sd.lossSquaredHinge( lossKey, out, layerLabel, d); - break; - case KL_DIVERGENCE: - loss = sd.lossKLD( lossKey, out, layerLabel, d); - break; - case MEAN_ABSOLUTE_ERROR: - loss = sd.lossMAE( lossKey, out, layerLabel, d); - break; - case L2: - loss = sd.lossL2( lossKey, out, layerLabel, d); - break; - case MEAN_SQUARED_LOGARITHMIC_ERROR: - loss = sd.lossMSLE( lossKey, out, layerLabel, d); - break; - case POISSON: - loss = sd.lossPoisson( lossKey, out, layerLabel, d); - break; - case EXPLL: - case RMSE_XENT: - case RECONSTRUCTION_CROSSENTROPY: - case CUSTOM: - case COSINE_PROXIMITY: - case MEAN_ABSOLUTE_PERCENTAGE_ERROR: - default: - throw new UnsupportedOperationException("Unsupported loss function: " + lossFn); - }*/ - - switch (lossFn) { - case MSE: - SDVariable diff = out.sub(layerLabel); - SDVariable sqDiff = diff.mul(diff); - SDVariable mse = sd.mean(lossKeys.getFirst(), sqDiff, 1); - SDVariable score = sd.mean(lossKeys.getSecond(), mse); - break; - default: - throw new UnsupportedOperationException("Not yet implemented: " + lossFn); - } - - - return Collections.singletonList("out"); - } - - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { - if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); - } - } - - public static class Builder extends BaseSameDiffOutputLayer.Builder { - - private int nIn; - private int nOut; - private Activation activation; - private LossFunctions.LossFunction lossFn = LossFunctions.LossFunction.MSE; - - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - public Builder activation(Activation activation) { - this.activation = activation; - return this; - } - - public Builder lossFunction(LossFunctions.LossFunction lossFn) { - this.lossFn = lossFn; - return this; - } - - @Override - public SameDiffOutput build() { - return new SameDiffOutput(this); - } - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java deleted file mode 100644 index 26c98b135f87..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLossLayer.java +++ /dev/null @@ -1,43 +0,0 @@ -package org.deeplearning4j.nn.conf.layers.samediff; - -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.layers.samediff.SameDiffLossLayer; -import org.deeplearning4j.optimize.api.IterationListener; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; - -import java.util.Collection; -import java.util.Map; - -public abstract class BaseSameDiffLossLayer extends NoParamSameDiffLayer { - - protected BaseSameDiffLossLayer(Builder builder){ - super(builder); - } - - protected BaseSameDiffLossLayer(){ - //No arg for Jackson/JSON - } - - public abstract void defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable label); - - public abstract String lossPerExampleVar(); - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { - SameDiffLossLayer ret = new SameDiffLossLayer(conf); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setConf(conf); - return ret; - } - - public static abstract class Builder> extends NoParamSameDiffLayer.Builder { - - - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java deleted file mode 100644 index 768927715a12..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffOutputLayer.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.deeplearning4j.nn.conf.layers.samediff; - -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.layers.samediff.SameDiffOutputLayer; -import org.deeplearning4j.optimize.api.IterationListener; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.primitives.Pair; - -import java.util.Collection; -import java.util.List; -import java.util.Map; - -public abstract class BaseSameDiffOutputLayer extends AbstractSameDiffLayer { - - protected BaseSameDiffOutputLayer(Builder builder){ - super(builder); - } - - protected BaseSameDiffOutputLayer(){ - //No arg for JSON/Jackson - } - - public abstract String outputActivationsKey(); - - /** - * Two keys: - * First - For the score *per example* (1 value per example) - * Second - for the average score (1 values for all examples) - * @return - */ - public abstract Pair lossKeys(); - - public abstract int[] labelShape(); - - - public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable layerLabel, Map paramTable); - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams) { - SameDiffOutputLayer ret = new SameDiffOutputLayer(conf); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setConf(conf); - return ret; - } - - public static abstract class Builder> extends AbstractSameDiffLayer.Builder { - - - - - } - -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index 9ed214799acd..fbae36305f3a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -58,10 +58,6 @@ public INDArray activate(boolean training) { doInit(); } - //Build map: -// Map map = new HashMap<>(paramTable()); -// map.put(INPUT_KEY, input); - sameDiff.associateArrayWithVariable(input, sameDiff.getVariable(INPUT_KEY)); try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java deleted file mode 100644 index f4dbd00f7c77..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLossLayer.java +++ /dev/null @@ -1,480 +0,0 @@ -package org.deeplearning4j.nn.layers.samediff; - -import com.google.common.base.Preconditions; -import lombok.Getter; -import lombok.Setter; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLossLayer; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; -import org.deeplearning4j.nn.gradient.DefaultGradient; -import org.deeplearning4j.nn.gradient.Gradient; -import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.deeplearning4j.optimize.api.IterationListener; -import org.nd4j.autodiff.functions.DifferentialFunction; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.api.DataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.primitives.Pair; - -import java.util.*; - -public class SameDiffLossLayer implements IOutputLayer { - - public static final String LABEL_KEY = "label"; - - protected NeuralNetConfiguration conf; - @Getter @Setter protected int index; - @Getter @Setter protected INDArray input; - @Getter @Setter private INDArray labels; - protected double score; - - @Getter @Setter protected int iterationCount; - @Getter @Setter protected int epochCount; - - protected SameDiff sameDiff; - protected SDVariable inVar; - protected SDVariable labelVar; - - protected Gradient emptyGrad = new DefaultGradient(); - - protected double fullNetL1; - protected double fullNetL2; - - - public SameDiffLossLayer(NeuralNetConfiguration conf) { - this.conf = conf; - } - - @Override - public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { - this.fullNetL1 = fullNetworkL1; - this.fullNetL2 = fullNetworkL2; - computeGradientAndScore(); - return score; - } - - @Override - public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { - Preconditions.checkNotNull(input, "Input cannot be null when calculating score for examples"); - Preconditions.checkNotNull(labels, "Labels cannot be null when calculating score for exapmles"); - - sameDiff.associateArrayWithVariable(input, inVar); - sameDiff.associateArrayWithVariable(labels, labelVar); - - sameDiff.exec(); - - //Next: need to determine name of the "score for each example" component... - String key = ((BaseSameDiffLossLayer)conf().getLayer()).lossPerExampleVar(); - INDArray out = sameDiff.getVariable(key).getArr(); - if(fullNetworkL1 > 0 || fullNetworkL2 > 0){ - out.addi(fullNetworkL1 + fullNetworkL2); - } - return out; - } - - - @Override - public double score() { - return score; - } - - @Override - public void computeGradientAndScore() { - Pair p = backpropGradient(null); - - sameDiff.associateArrayWithVariable(input, inVar); - sameDiff.associateArrayWithVariable(labels, labelVar); - - INDArray out = sameDiff.execAndEndResult(); - if(out.length() != 1){ - throw new IllegalStateException("Expected scalar score: got array with shape " + Arrays.toString(out.shape())); - } - - score = out.getDouble(0); - score += fullNetL1 + fullNetL2; -// score /= input.size(0); - } - - @Override - public Pair backpropGradient(INDArray epsilon) { - if(input == null){ - throw new IllegalStateException("Cannot compute gradient without input (input is null)"); - } - if(labels == null){ - throw new IllegalStateException("Cannot compute gradient without labels (labels are null)"); - } - if(sameDiff == null){ - doInit(); - } - - Pair,List> p = sameDiff.execBackwards(); - - SDVariable inGrad = sameDiff.grad(inVar.getVarName()); - - return new Pair<>(emptyGrad, inGrad.getArr()); - } - - - protected void doInit(){ - SameDiff sd = SameDiff.create(); - SDVariable in = sd.var("input", input); - SDVariable label = sd.var("label", labels); - - BaseSameDiffLossLayer l = (BaseSameDiffLossLayer) conf.getLayer(); - l.defineLayer(sd, in, label); - - this.sameDiff = sd; - this.inVar = in; - this.labelVar = label; - } - - - //-------------------------------------------------------------------------------------------------------- - - @Override - public double f1Score(DataSet data) { - throw new UnsupportedOperationException(); - } - - @Override - public double f1Score(INDArray examples, INDArray labels) { - throw new UnsupportedOperationException(); - } - - @Override - public int numLabels() { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(DataSetIterator iter) { - throw new UnsupportedOperationException(); - } - - @Override - public int[] predict(INDArray examples) { - throw new UnsupportedOperationException(); - } - - @Override - public List predict(DataSet dataSet) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray labelProbabilities(INDArray examples) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(INDArray examples, INDArray labels) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(DataSet data) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(INDArray examples, int[] labels) { - throw new UnsupportedOperationException(); - } - - - @Override - public void setCacheMode(CacheMode mode) { - //No op - } - - @Override - public double calcL2(boolean backpropOnlyParams) { - return 0; //No params - } - - @Override - public double calcL1(boolean backpropOnlyParams) { - return 0; //No params - } - - @Override - public Type type() { - return Type.FEED_FORWARD; - } - - @Override - public INDArray preOutput(INDArray x) { - return x; - } - - @Override - public INDArray preOutput(INDArray x, TrainingMode training) { - return x; - } - - @Override - public INDArray activate(TrainingMode training) { - return input; - } - - @Override - public INDArray activate(INDArray input, TrainingMode training) { - return input; - } - - @Override - public INDArray preOutput(INDArray x, boolean training) { - return x; - } - - @Override - public INDArray activate(boolean training) { - return input; - } - - @Override - public INDArray activate(INDArray input, boolean training) { - return input; - } - - @Override - public INDArray activate() { - return input; - } - - @Override - public INDArray activate(INDArray input) { - return input; - } - - @Override - public Layer transpose() { - throw new UnsupportedOperationException(); - } - - @Override - public Layer clone() { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getListeners() { - return null; - } - - @Override - public void setListeners(IterationListener... listeners) { - //No op - } - - @Override - public void addListeners(IterationListener... listener) { - //No op - } - - @Override - public void fit() { - throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); - } - - @Override - public void update(Gradient gradient) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void update(INDArray gradient, String paramType) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void accumulateScore(double accum) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public INDArray params() { - return null; - } - - @Override - public int numParams() { - return 0; - } - - @Override - public int numParams(boolean backwards) { - return 0; - } - - @Override - public void setParams(INDArray params) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void setParamsViewArray(INDArray params) { - if(params != null) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - } - - @Override - public INDArray getGradientsViewArray() { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void fit(INDArray data) { - throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); - } - - @Override - public void iterate(INDArray input) { - throw new UnsupportedOperationException("Cannot fit SameDiffLossLayer"); - } - - @Override - public Gradient gradient() { - return null; //No parameters -> no gradient - } - - @Override - public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); - } - - @Override - public int batchSize() { - return 0; - } - - @Override - public NeuralNetConfiguration conf() { - return conf; - } - - @Override - public void setConf(NeuralNetConfiguration conf) { - this.conf = conf; - } - - @Override - public INDArray input() { - return input; - } - - @Override - public void validateInput() { - //No op - } - - @Override - public ConvexOptimizer getOptimizer() { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public INDArray getParam(String param) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void initParams() { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public Map paramTable() { - return Collections.emptyMap(); - } - - @Override - public Map paramTable(boolean backpropParamsOnly) { - return paramTable(); - } - - @Override - public void setParamTable(Map paramTable) { - if(paramTable != null && paramTable.size() > 0) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - } - - @Override - public void setParam(String key, INDArray val) { - throw new UnsupportedOperationException("Not supported (no parameters)"); - } - - @Override - public void clear() { - input = null; - labels = null; - } - - @Override - public void applyConstraints(int iteration, int epoch) { - //No op - } - - @Override - public void init() { - //No op - } - - @Override - public void setListeners(Collection listeners) { - //No op - } - - @Override - public void setInputMiniBatchSize(int size) { - - } - - @Override - public int getInputMiniBatchSize() { - return 0; - } - - @Override - public void setMaskArray(INDArray maskArray) { - if(maskArray != null) { - throw new UnsupportedOperationException("Mask arrays: not yet supported for SameDiffLossLayer"); - } - } - - @Override - public INDArray getMaskArray() { - return null; - } - - @Override - public boolean isPretrainLayer() { - return false; - } - - @Override - public void clearNoiseWeightParams() { - //No op - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - if(maskArray != null){ - throw new UnsupportedOperationException("Mask arrays: not yet supported for SameDiffLossLayer"); - } - return null; - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java deleted file mode 100644 index df8414163097..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ /dev/null @@ -1,127 +0,0 @@ -package org.deeplearning4j.nn.layers.samediff; - -import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffOutputLayer; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.api.DataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -public class SameDiffOutputLayer extends SameDiffLayer implements IOutputLayer { - - public static final String LABEL_KEY = "label"; - - private INDArray labels; - - - - public SameDiffOutputLayer(NeuralNetConfiguration conf) { - super(conf); - } - - @Override - public void setLabels(INDArray labels) { - this.labels = labels; - } - - @Override - public INDArray getLabels() { - return labels; - } - - @Override - public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public double f1Score(DataSet data) { - throw new UnsupportedOperationException(); - } - - @Override - public double f1Score(INDArray examples, INDArray labels) { - throw new UnsupportedOperationException(); - } - - @Override - public int numLabels() { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(DataSetIterator iter) { - throw new UnsupportedOperationException(); - } - - @Override - public int[] predict(INDArray examples) { - throw new UnsupportedOperationException(); - } - - @Override - public List predict(DataSet dataSet) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray labelProbabilities(INDArray examples) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(INDArray examples, INDArray labels) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(DataSet data) { - throw new UnsupportedOperationException(); - } - - @Override - public void fit(INDArray examples, int[] labels) { - throw new UnsupportedOperationException(); - } - - protected void doInit(){ - BaseSameDiffOutputLayer ol = ((BaseSameDiffOutputLayer)layerConf()); - - sameDiff = SameDiff.create(); - Map p = paramTable(); - - int[] inputShape = input.shape().clone(); - int[] labelShape = ol.labelShape(); -// inputShape[0] = -1; //TODO THIS DOESN'T ENABLE VARIABLE SIZE MINIBATCHES - SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); - SDVariable labelVar = sameDiff.var(LABEL_KEY, labelShape); - Map paramShapes = layerConf().paramShapes(); - Map params = new LinkedHashMap<>(); - for(String s : layerConf().paramKeys()){ - int[] ps = paramShapes.get(s); - SDVariable v = sameDiff.var(s, ps); - params.put(s, v); - } - List outputKeys = ol.defineLayer(sameDiff, inputVar, labelVar, params); - if(outputKeys == null || outputKeys.size() != 1){ - throw new IllegalStateException("Invalid output keys: " + outputKeys); - } - - for(Map.Entry e : p.entrySet()){ - sameDiff.associateArrayWithVariable(e.getValue(), sameDiff.getVariable(e.getKey())); - } - - this.outputKeys = outputKeys; - } -} From d869d8d28bfd8d1dfab19c64583bf1b094c8721f Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 19 Feb 2018 22:15:26 +1100 Subject: [PATCH 28/34] Add parameter initialization for samediff layers --- .../samediff/TestSameDiffConv.java | 10 +++++-- .../samediff/TestSameDiffDense.java | 16 +++++++--- .../samediff/testlayers/SameDiffConv.java | 15 ++++++++++ .../samediff/testlayers/SameDiffDense.java | 15 +++++++++- .../samediff/AbstractSameDiffLayer.java | 3 +- .../layers/samediff/BaseSameDiffLayer.java | 9 ++++++ .../nn/layers/samediff/SameDiffLayer.java | 30 ++++++++++++++----- .../nn/multilayer/MultiLayerNetwork.java | 4 ++- .../nn/params/SameDiffParamInitializer.java | 3 +- 9 files changed, 86 insertions(+), 19 deletions(-) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java index 5f1cb7f3c7c1..8b7834f2e3d6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java @@ -12,6 +12,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.samediff.testlayers.SameDiffConv; import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; @@ -86,7 +87,7 @@ public void testSameDiffConvForward() { Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, -// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.CUBE, Activation.HARDTANH, Activation.RELU }; @@ -112,8 +113,10 @@ public void testSameDiffConvForward() { log.info("Starting test: " + msg); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .seed(12345) .list() .layer(new SameDiffConv.Builder() + .weightInit(WeightInit.XAVIER) .nIn(nIn) .nOut(nOut) .kernelSize(kernel) @@ -124,6 +127,7 @@ public void testSameDiffConvForward() { .hasBias(hasBias) .build()) .layer(new SameDiffConv.Builder() + .weightInit(WeightInit.XAVIER) .nIn(nOut) .nOut(nOut) .kernelSize(kernel) @@ -141,6 +145,8 @@ public void testSameDiffConvForward() { assertNotNull(net.paramTable()); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .weightInit(WeightInit.XAVIER) + .seed(12345) .list() .layer(new ConvolutionLayer.Builder() .nIn(nIn) @@ -167,7 +173,7 @@ public void testSameDiffConvForward() { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net.params().assign(net2.params()); + assertEquals(net2.params(), net.params()); //Check params: assertEquals(msg, net2.params(), net.params()); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java index 4712155574a2..36fc9dde9b80 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java @@ -9,6 +9,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; import org.nd4j.autodiff.samediff.SDVariable; @@ -132,20 +133,24 @@ public void testSameDiffDenseForwardMultiLayer() { Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, -// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 Activation.HARDTANH, -// Activation.RELU //JVM crash + Activation.RELU //JVM crash }; for (Activation a : afns) { log.info("Starting test - " + a); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .seed(12345) .list() .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .weightInit(WeightInit.XAVIER) .activation(a).build()) .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut) + .weightInit(WeightInit.XAVIER) .activation(a).build()) .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut) + .weightInit(WeightInit.XAVIER) .activation(a).build()) .build(); @@ -155,6 +160,8 @@ public void testSameDiffDenseForwardMultiLayer() { assertNotNull(net.paramTable()); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .seed(12345) + .weightInit(WeightInit.XAVIER) .list() .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) .layer(new DenseLayer.Builder().activation(a).nIn(nOut).nOut(nOut).build()) @@ -165,7 +172,8 @@ public void testSameDiffDenseForwardMultiLayer() { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net.params().assign(net2.params()); +// net.params().assign(net2.params()); + assertEquals(net2.params(), net.params()); //Check params: assertEquals(net2.params(), net.params()); @@ -188,7 +196,7 @@ public void testSameDiffDenseForwardMultiLayer() { } } - @Test + @Test(expected = UnsupportedOperationException.class) //Backprop not yet supported public void testSameDiffDenseBackward() { int nIn = 3; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java index a92eee48c39b..1dcda2499154 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java @@ -11,10 +11,12 @@ import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.weights.WeightInitUtil; import org.deeplearning4j.util.ConvolutionUtils; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; @@ -123,6 +125,19 @@ public Map paramShapes() { return paramShapes; } + @Override + public void initializeParams(Map params) { + for(Map.Entry e : params.entrySet()){ + if(ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())){ + e.getValue().assign(0); + } else { + double fanIn = nIn * kernel[0] * kernel[1]; + double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } + } + } + @Override public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java index 96f345d10b33..040b19cae68b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java @@ -10,10 +10,12 @@ import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import java.util.*; @@ -82,6 +84,18 @@ public Map paramShapes() { return paramShapes; } + @Override + public void initializeParams(Map params){ + for(Map.Entry e : params.entrySet()){ + if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){ + e.getValue().assign(0.0); + } else { + //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayer + WeightInitUtil.initWeights(nIn, nOut, new int[]{nIn, nOut}, weightInit, null, 'f', e.getValue()); + } + } + } + @Override public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); @@ -89,7 +103,6 @@ public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramShapes(); + public abstract void initializeParams(Map params); + public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); @Override @@ -208,6 +210,5 @@ public T biasUpdater(IUpdater biasUpdater){ this.biasUpdater = biasUpdater; return (T) this; } - } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 8104d2598075..db22189115da 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -10,6 +10,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.params.SameDiffParamInitializer; +import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -25,9 +26,11 @@ @EqualsAndHashCode(callSuper = true) public abstract class BaseSameDiffLayer extends AbstractSameDiffLayer { + protected WeightInit weightInit; protected BaseSameDiffLayer(Builder builder){ super(builder); + this.weightInit = builder.weightInit; } protected BaseSameDiffLayer(){ @@ -50,9 +53,15 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, return ret; } + @SuppressWarnings("unchecked") public static abstract class Builder> extends AbstractSameDiffLayer.Builder { + protected WeightInit weightInit = WeightInit.XAVIER; + public T weightInit(WeightInit weightInit){ + this.weightInit = weightInit; + return (T)this; + } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index fbae36305f3a..adcbc4aca68b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -14,10 +14,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.primitives.Pair; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class SameDiffLayer extends AbstractLayer { @@ -74,6 +71,9 @@ public INDArray preOutput(boolean training) { @Override public Pair backpropGradient(INDArray epsilon) { + throw new UnsupportedOperationException("Fitting DL4J SameDiff layers via backpropagation is not yet supported"); + + /* Gradient g = new DefaultGradient(); INDArray dLdIn; @@ -88,6 +88,7 @@ public Pair backpropGradient(INDArray epsilon) { } return new Pair<>(g, dLdIn); + */ } @Override @@ -133,7 +134,14 @@ public INDArray getParam(String param) { @Override public void setParam(String key, INDArray val) { - throw new UnsupportedOperationException("Not supported"); + if(!paramTable.containsKey(key)){ + throw new IllegalArgumentException("Cannot set parameter, invalid/unknown parameter key: " + key); + } + INDArray current = paramTable.get(key); + if(!Arrays.equals(current.shape(), val.shape())){ + throw new IllegalArgumentException("Cannot set parameter \"" + key + "\", invalid shape: parameter array has shape " + + Arrays.toString(current.shape()) + ", trying to set parameter of shape " + Arrays.toString(val.shape())); + } } @Override @@ -144,7 +152,7 @@ public void setParams(INDArray params) { } protected void setParams(INDArray params, char order) { - throw new UnsupportedOperationException("Not supported"); + setParams(params); } @Override @@ -154,7 +162,7 @@ public void setParamsViewArray(INDArray params) { @Override public INDArray getGradientsViewArray() { - return params; + return gradients; } @Override @@ -164,7 +172,13 @@ public void setBackpropGradientsViewArray(INDArray gradients) { @Override public void setParamTable(Map paramTable) { - this.paramTable = paramTable; + if(this.paramTable == null){ + this.paramTable = paramTable; + } else { + for (Map.Entry e : paramTable.entrySet()) { + setParam(e.getKey(), e.getValue()); + } + } } @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index d3b2e2f38ac1..899c513b0c65 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -1492,7 +1492,9 @@ protected Pair calcBackpropGradients(INDArray epsilon, boole } //log.info("This layer space: {}", ((Nd4jWorkspace) ws).getThisCycleAllocations()); - } catch (Exception e) { + } catch (RuntimeException e) { + throw e; + } catch(Exception e) { throw new RuntimeException(e); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index bfa7c686539b..0dcbe3e64107 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -74,8 +74,7 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView, sd); if(initializeParams){ - //TODO - log.warn("***** SameDiffParamInitializer: Parameter initialization not yet implemented *****"); + sd.initializeParams(out); } for(String s : sd.paramKeys()){ From 62d267f6463da5feb92a158726794d80dd2fd314 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 20 Feb 2018 12:37:23 +1100 Subject: [PATCH 29/34] Refactoring, reduce number of methods to override --- .../layers}/samediff/TestSameDiffConv.java | 17 +--- .../layers}/samediff/TestSameDiffDense.java | 9 +-- .../samediff/testlayers/SameDiffConv.java | 74 +++++++---------- .../samediff/testlayers/SameDiffDense.java | 29 ++----- .../samediff/AbstractSameDiffLayer.java | 31 +++----- .../conf/layers/samediff/SDLayerParams.java | 25 ++++++ .../samediff/impl/DefaultSDLayerParams.java | 79 +++++++++++++++++++ .../nn/params/SameDiffParamInitializer.java | 18 +++-- 8 files changed, 167 insertions(+), 115 deletions(-) rename deeplearning4j-core/src/test/java/org/deeplearning4j/{ => nn/layers}/samediff/TestSameDiffConv.java (92%) rename deeplearning4j-core/src/test/java/org/deeplearning4j/{ => nn/layers}/samediff/TestSameDiffDense.java (97%) rename deeplearning4j-core/src/test/java/org/deeplearning4j/{ => nn/layers}/samediff/testlayers/SameDiffConv.java (83%) rename deeplearning4j-core/src/test/java/org/deeplearning4j/{ => nn/layers}/samediff/testlayers/SameDiffDense.java (84%) create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java create mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java similarity index 92% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java index 8b7834f2e3d6..2e2ff81690c4 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java @@ -1,38 +1,25 @@ -package org.deeplearning4j.samediff; +package org.deeplearning4j.nn.layers.samediff; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.samediff.testlayers.SameDiffConv; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.samediff.testlayers.SameDiffConv; -import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; -import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; -import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; import java.util.Map; import java.util.Random; import static org.junit.Assert.*; -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; @Slf4j public class TestSameDiffConv { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java similarity index 97% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 36fc9dde9b80..2d1dd9bf34ee 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/TestSameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -1,4 +1,4 @@ -package org.deeplearning4j.samediff; +package org.deeplearning4j.nn.layers.samediff; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.TestUtils; @@ -7,21 +7,16 @@ import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.samediff.testlayers.SameDiffDense; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.samediff.testlayers.SameDiffDense; import org.junit.Test; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.blas.params.MMulTranspose; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.ops.transforms.Transforms; -import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertArrayEquals; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java similarity index 83% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index 1dcda2499154..29d0ede36a06 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -1,4 +1,4 @@ -package org.deeplearning4j.samediff.testlayers; +package org.deeplearning4j.nn.layers.samediff.testlayers; import lombok.Data; import lombok.EqualsAndHashCode; @@ -6,13 +6,12 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; -import org.deeplearning4j.util.ConvolutionUtils; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; @@ -42,8 +41,6 @@ public class SameDiffConv extends BaseSameDiffLayer { private int[] dilation; private boolean hasBias; - private Map paramShapes; - protected SameDiffConv(Builder b) { super(b); this.nIn = b.nIn; @@ -82,51 +79,38 @@ public InputPreProcessor getPreProcessorForInputType(InputType inputType) { } @Override - public List weightKeys() { - return WEIGHT_KEYS; + public void defineParameters(SDLayerParams params) { + params.clear(); + int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; + params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); + int[] biasShape = new int[]{1, nOut}; + params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); } - @Override - public List biasKeys() { - if(hasBias) { - return BIAS_KEYS; - } else { - return Collections.emptyList(); - } - } +// @Override +// public char paramReshapeOrder(String param) { +// //To match DL4J +// return 'c'; +// } + +// @Override +// public Map paramShapes() { +// if (paramShapes == null) { +// int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; +// Map m = new HashMap<>(); +// m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); +// if(hasBias) { +// int[] biasShape = new int[]{1, nOut}; +// m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); +// } +// paramShapes = m; +// } +// return paramShapes; +// } - @Override - public List paramKeys() { - if(hasBias) { - return PARAM_KEYS; - } else { - return WEIGHT_KEYS; - } - } - - @Override - public char paramReshapeOrder(String param) { - //To match DL4J - return 'c'; - } - - @Override - public Map paramShapes() { - if (paramShapes == null) { - int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; - Map m = new HashMap<>(); - m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); - if(hasBias) { - int[] biasShape = new int[]{1, nOut}; - m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); - } - paramShapes = m; - } - return paramShapes; - } @Override - public void initializeParams(Map params) { + public void initializeParameters(Map params) { for(Map.Entry e : params.entrySet()){ if(ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())){ e.getValue().assign(0); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java similarity index 84% rename from deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java rename to deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index 040b19cae68b..84f0697a2563 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -1,20 +1,18 @@ -package org.deeplearning4j.samediff.testlayers; +package org.deeplearning4j.nn.layers.samediff.testlayers; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; @@ -65,27 +63,14 @@ public InputPreProcessor getPreProcessorForInputType(InputType inputType) { } @Override - public List weightKeys() { - return W_KEYS; + public void defineParameters(SDLayerParams params) { + params.clear(); + params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); + params.addBiasParam(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); } @Override - public List biasKeys() { - return B_KEYS; - } - - @Override - public Map paramShapes() { - if(paramShapes == null){ - paramShapes = new HashMap<>(); - paramShapes.put(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); - paramShapes.put(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); - } - return paramShapes; - } - - @Override - public void initializeParams(Map params){ + public void initializeParameters(Map params){ for(Map.Entry e : params.entrySet()){ if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){ e.getValue().assign(0.0); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index c123f61ded74..a47f5640ea73 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -7,6 +7,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.samediff.impl.DefaultSDLayerParams; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.params.SameDiffParamInitializer; @@ -32,8 +33,7 @@ public abstract class AbstractSameDiffLayer extends Layer { protected IUpdater updater; protected IUpdater biasUpdater; - - private List paramKeys; + private SDLayerParams layerParams; protected AbstractSameDiffLayer(Builder builder){ super(builder); @@ -43,6 +43,9 @@ protected AbstractSameDiffLayer(Builder builder){ this.l2Bias = builder.l2Bias; this.updater = builder.updater; this.biasUpdater = builder.biasUpdater; + + layerParams = new DefaultSDLayerParams(); + defineParameters(layerParams); } protected AbstractSameDiffLayer(){ @@ -58,13 +61,15 @@ protected AbstractSameDiffLayer(){ @Override public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); - public abstract List weightKeys(); - - public abstract List biasKeys(); +// public abstract List weightKeys(); +// +// public abstract List biasKeys(); +// +// public abstract Map paramShapes(); - public abstract Map paramShapes(); + public abstract void defineParameters(SDLayerParams params); - public abstract void initializeParams(Map params); + public abstract void initializeParameters(Map params); public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); @@ -109,18 +114,8 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { return new LayerMemoryReport(); //TODO } - public List paramKeys(){ - if(paramKeys == null){ - List pk = new ArrayList<>(); - pk.addAll(weightKeys()); - pk.addAll(biasKeys()); - paramKeys = pk; - } - return paramKeys; - } - public char paramReshapeOrder(String param){ - return 'f'; + return 'c'; } public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java new file mode 100644 index 000000000000..e00ae60a51a8 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java @@ -0,0 +1,25 @@ +package org.deeplearning4j.nn.conf.layers.samediff; + +import org.nd4j.shade.jackson.annotation.JsonSubTypes; +import org.nd4j.shade.jackson.annotation.JsonTypeInfo; + +import java.util.List; +import java.util.Map; + +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +public interface SDLayerParams { + + void addWeightParam(String paramKey, int[] paramShape); + + void addBiasParam(String paramKey, int[] paramShape); + + List getParameterKeys(); + + List getWeightParameterKeys(); + + List getBiasParameterKeys(); + + Map getParamShapes(); + + void clear(); +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java new file mode 100644 index 000000000000..a4bfdacad3be --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java @@ -0,0 +1,79 @@ +package org.deeplearning4j.nn.conf.layers.samediff.impl; + +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; + +import java.util.*; + +@JsonIgnoreProperties({"paramsList", "weightParamsList", "biasParamsList"}) +@EqualsAndHashCode(exclude = {"paramsList", "weightParamsList", "biasParamsList"}) +public class DefaultSDLayerParams implements SDLayerParams { + + private Map weightParams = new LinkedHashMap<>(); + private Map biasParams = new LinkedHashMap<>(); + + private List paramsList; + private List weightParamsList; + private List biasParamsList; + + @Override + public void addWeightParam(String paramKey, int[] paramShape) { + weightParams.put(paramKey, paramShape); + paramsList = null; + weightParams = null; + biasParams = null; + } + + @Override + public void addBiasParam(String paramKey, int[] paramShape) { + biasParams.put(paramKey, paramShape); + paramsList = null; + weightParams = null; + biasParams = null; + } + + @Override + public List getParameterKeys() { + if(paramsList == null) { + List out = new ArrayList<>(); + out.addAll(getWeightParameterKeys()); + out.addAll(getBiasParameterKeys()); + this.paramsList = Collections.unmodifiableList(out); + } + return paramsList; + } + + @Override + public List getWeightParameterKeys() { + if(weightParamsList == null){ + weightParamsList = Collections.unmodifiableList(new ArrayList<>(weightParams.keySet())); + } + return weightParamsList; + } + + @Override + public List getBiasParameterKeys() { + if(biasParamsList == null){ + biasParamsList = Collections.unmodifiableList(new ArrayList<>(biasParams.keySet())); + } + return biasParamsList; + } + + @Override + public Map getParamShapes() { + Map map = new LinkedHashMap<>(); + map.putAll(weightParams); + map.putAll(biasParams); + return map; + } + + @Override + public void clear() { + weightParams.clear(); + biasParams.clear(); + paramsList = null; + weightParamsList = null; + biasParamsList = null; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 0dcbe3e64107..dc1531733953 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -33,7 +33,7 @@ public int numParams(NeuralNetConfiguration conf) { @Override public int numParams(Layer layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; - Map m = sd.paramShapes(); + Map m = sd.getLayerParams().getParamShapes(); int n = 0; for(int[] arr : m.values()){ n += ArrayUtil.prod(arr); @@ -44,19 +44,19 @@ public int numParams(Layer layer) { @Override public List paramKeys(Layer layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; - return sd.paramKeys(); + return sd.getLayerParams().getParameterKeys(); } @Override public List weightKeys(Layer layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; - return sd.weightKeys(); + return sd.getLayerParams().getWeightParameterKeys(); } @Override public List biasKeys(Layer layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; - return sd.biasKeys(); + return sd.getLayerParams().getBiasParameterKeys(); } @Override @@ -72,12 +72,13 @@ public boolean isBiasParam(Layer layer, String key) { @Override public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); - Map out = subsetAndReshape(sd.paramKeys(), sd.paramShapes(), paramsView, sd); + Map out = subsetAndReshape(sd.getLayerParams().getParameterKeys(), + sd.getLayerParams().getParamShapes(), paramsView, sd); if(initializeParams){ - sd.initializeParams(out); + sd.initializeParameters(out); } - for(String s : sd.paramKeys()){ + for(String s : sd.getLayerParams().getParameterKeys()){ conf.addVariable(s); } @@ -87,7 +88,8 @@ public Map init(NeuralNetConfiguration conf, INDArray paramsVi @Override public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); - return subsetAndReshape(sd.paramKeys(), sd.paramShapes(), gradientView, sd); + return subsetAndReshape(sd.getLayerParams().getParameterKeys(), sd.getLayerParams().getParamShapes(), + gradientView, sd); } private Map subsetAndReshape(List params, Map paramShapes, INDArray view, From f84cb574cd2c056c323fc987555ee2ac954069f3 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 20 Feb 2018 14:22:13 +1100 Subject: [PATCH 30/34] More cleanup/fixes --- .../nn/layers/samediff/TestSameDiffConv.java | 3 - .../nn/layers/samediff/TestSameDiffDense.java | 4 +- .../samediff/testlayers/SameDiffConv.java | 28 +---- .../samediff/testlayers/SameDiffDense.java | 5 + .../nn/conf/NeuralNetConfiguration.java | 7 ++ .../samediff/AbstractSameDiffLayer.java | 23 ++-- .../conf/layers/samediff/SDLayerParams.java | 108 ++++++++++++++++-- .../samediff/impl/DefaultSDLayerParams.java | 79 ------------- .../nn/layers/samediff/SameDiffLayer.java | 4 +- 9 files changed, 125 insertions(+), 136 deletions(-) delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java index 2e2ff81690c4..e82509e8c90f 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java @@ -160,10 +160,7 @@ public void testSameDiffConvForward() { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertEquals(net2.params(), net.params()); - //Check params: - assertEquals(msg, net2.params(), net.params()); Map params1 = net.paramTable(); Map params2 = net2.paramTable(); assertEquals(msg, params2, params1); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 2d1dd9bf34ee..ce9f4500b2d6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -64,9 +64,9 @@ public void testSameDiffDenseForward() { Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, -// Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 + Activation.CUBE, Activation.HARDTANH, -// Activation.RELU //JVM crash + Activation.RELU }; for (Activation a : afns) { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index 29d0ede36a06..39abd04e76ca 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -83,32 +83,12 @@ public void defineParameters(SDLayerParams params) { params.clear(); int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); - int[] biasShape = new int[]{1, nOut}; - params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); + if(hasBias) { + int[] biasShape = new int[]{1, nOut}; + params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); + } } -// @Override -// public char paramReshapeOrder(String param) { -// //To match DL4J -// return 'c'; -// } - -// @Override -// public Map paramShapes() { -// if (paramShapes == null) { -// int[] weightsShape = new int[]{nOut, nIn, kernel[0], kernel[1]}; -// Map m = new HashMap<>(); -// m.put(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); -// if(hasBias) { -// int[] biasShape = new int[]{1, nOut}; -// m.put(ConvolutionParamInitializer.BIAS_KEY, biasShape); -// } -// paramShapes = m; -// } -// return paramShapes; -// } - - @Override public void initializeParameters(Map params) { for(Map.Entry e : params.entrySet()){ diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index 84f0697a2563..40ba53b8d033 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -100,6 +100,11 @@ public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig } } + public char paramReshapeOrder(String param){ + //To match DL4J + return 'f'; + } + public static class Builder extends BaseSameDiffLayer.Builder { private int nIn; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index 7ea684361e00..ca75fd1e7a5f 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -50,6 +50,7 @@ import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.shade.jackson.annotation.JsonAutoDetect; import org.nd4j.shade.jackson.databind.*; import org.nd4j.shade.jackson.databind.deser.BeanDeserializerModifier; import org.nd4j.shade.jackson.databind.introspect.AnnotatedClass; @@ -448,6 +449,12 @@ private static void configureMapper(ObjectMapper ret) { ret.configure(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY, true); ret.enable(SerializationFeature.INDENT_OUTPUT); + ret.setVisibilityChecker(ret.getSerializationConfig().getDefaultVisibilityChecker() + .withFieldVisibility(JsonAutoDetect.Visibility.ANY) + .withGetterVisibility(JsonAutoDetect.Visibility.NONE) + .withSetterVisibility(JsonAutoDetect.Visibility.NONE) + .withCreatorVisibility(JsonAutoDetect.Visibility.NONE)); + SimpleModule customDeserializerModule = new SimpleModule(); customDeserializerModule.setDeserializerModifier(new BeanDeserializerModifier() { @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index a47f5640ea73..b6151c958d30 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -7,19 +7,13 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.layers.samediff.impl.DefaultSDLayerParams; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.params.SameDiffParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.nd4j.autodiff.samediff.SDVariable; -import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; -import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.Map; @Data @@ -43,15 +37,20 @@ protected AbstractSameDiffLayer(Builder builder){ this.l2Bias = builder.l2Bias; this.updater = builder.updater; this.biasUpdater = builder.biasUpdater; - - layerParams = new DefaultSDLayerParams(); - defineParameters(layerParams); } protected AbstractSameDiffLayer(){ //No op constructor for Jackson } + public SDLayerParams getLayerParams(){ + if(layerParams == null){ + layerParams = new SDLayerParams(); + defineParameters(layerParams); + } + return layerParams; + } + @Override public abstract InputType getOutputType(int layerIndex, InputType inputType); @@ -61,12 +60,6 @@ protected AbstractSameDiffLayer(){ @Override public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); -// public abstract List weightKeys(); -// -// public abstract List biasKeys(); -// -// public abstract Map paramShapes(); - public abstract void defineParameters(SDLayerParams params); public abstract void initializeParameters(Map params); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java index e00ae60a51a8..b23edc7e5fb7 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java @@ -1,25 +1,111 @@ package org.deeplearning4j.nn.conf.layers.samediff; -import org.nd4j.shade.jackson.annotation.JsonSubTypes; +import lombok.NoArgsConstructor; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.shade.jackson.annotation.JsonIgnore; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; +import org.nd4j.shade.jackson.annotation.JsonProperty; import org.nd4j.shade.jackson.annotation.JsonTypeInfo; -import java.util.List; -import java.util.Map; +import java.util.*; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") -public interface SDLayerParams { +@JsonIgnoreProperties({"paramsList", "weightParamsList", "biasParamsList"}) +@NoArgsConstructor +public class SDLayerParams { - void addWeightParam(String paramKey, int[] paramShape); + private Map weightParams = new LinkedHashMap<>(); + private Map biasParams = new LinkedHashMap<>(); - void addBiasParam(String paramKey, int[] paramShape); + @JsonIgnore private List paramsList; + @JsonIgnore private List weightParamsList; + @JsonIgnore private List biasParamsList; - List getParameterKeys(); + public SDLayerParams(@JsonProperty("weightParams") Map weightParams, + @JsonProperty("biasParams") Map biasParams){ + this.weightParams = weightParams; + this.biasParams = biasParams; + } - List getWeightParameterKeys(); + public void addWeightParam(String paramKey, int[] paramShape) { + weightParams.put(paramKey, paramShape); + paramsList = null; + weightParamsList = null; + biasParamsList = null; + } - List getBiasParameterKeys(); + public void addBiasParam(String paramKey, int[] paramShape) { + biasParams.put(paramKey, paramShape); + paramsList = null; + weightParamsList = null; + biasParamsList = null; + } - Map getParamShapes(); + public List getParameterKeys() { + if(paramsList == null) { + List out = new ArrayList<>(); + out.addAll(getWeightParameterKeys()); + out.addAll(getBiasParameterKeys()); + this.paramsList = Collections.unmodifiableList(out); + } + return paramsList; + } - void clear(); + public List getWeightParameterKeys() { + if(weightParamsList == null){ + weightParamsList = Collections.unmodifiableList(new ArrayList<>(weightParams.keySet())); + } + return weightParamsList; + } + + public List getBiasParameterKeys() { + if(biasParamsList == null){ + biasParamsList = Collections.unmodifiableList(new ArrayList<>(biasParams.keySet())); + } + return biasParamsList; + } + + public Map getParamShapes() { + Map map = new LinkedHashMap<>(); + map.putAll(weightParams); + map.putAll(biasParams); + return map; + } + + public void clear() { + weightParams.clear(); + biasParams.clear(); + paramsList = null; + weightParamsList = null; + biasParamsList = null; + } + + public boolean equals(Object o) { + if(!(o instanceof SDLayerParams)){ + return false; + } + SDLayerParams s = (SDLayerParams)o; + return equals(weightParams, s.weightParams) && equals(biasParams, s.biasParams); + } + + private static boolean equals(Map first, Map second){ + //Helper method - Lombok equals method seems to have trouble with arrays... + if(!first.keySet().equals(second.keySet())){ + return false; + } + for(Map.Entry e : first.entrySet()){ + if(!Arrays.equals(e.getValue(), second.get(e.getKey()))){ + return false; + } + } + return true; + } + + public int hashCode() { + return weightParams.hashCode() ^ biasParams.hashCode(); + } + + protected boolean canEqual(Object other) { + return other instanceof SDLayerParams; + } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java deleted file mode 100644 index a4bfdacad3be..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/impl/DefaultSDLayerParams.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.deeplearning4j.nn.conf.layers.samediff.impl; - -import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; -import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; - -@JsonIgnoreProperties({"paramsList", "weightParamsList", "biasParamsList"}) -@EqualsAndHashCode(exclude = {"paramsList", "weightParamsList", "biasParamsList"}) -public class DefaultSDLayerParams implements SDLayerParams { - - private Map weightParams = new LinkedHashMap<>(); - private Map biasParams = new LinkedHashMap<>(); - - private List paramsList; - private List weightParamsList; - private List biasParamsList; - - @Override - public void addWeightParam(String paramKey, int[] paramShape) { - weightParams.put(paramKey, paramShape); - paramsList = null; - weightParams = null; - biasParams = null; - } - - @Override - public void addBiasParam(String paramKey, int[] paramShape) { - biasParams.put(paramKey, paramShape); - paramsList = null; - weightParams = null; - biasParams = null; - } - - @Override - public List getParameterKeys() { - if(paramsList == null) { - List out = new ArrayList<>(); - out.addAll(getWeightParameterKeys()); - out.addAll(getBiasParameterKeys()); - this.paramsList = Collections.unmodifiableList(out); - } - return paramsList; - } - - @Override - public List getWeightParameterKeys() { - if(weightParamsList == null){ - weightParamsList = Collections.unmodifiableList(new ArrayList<>(weightParams.keySet())); - } - return weightParamsList; - } - - @Override - public List getBiasParameterKeys() { - if(biasParamsList == null){ - biasParamsList = Collections.unmodifiableList(new ArrayList<>(biasParams.keySet())); - } - return biasParamsList; - } - - @Override - public Map getParamShapes() { - Map map = new LinkedHashMap<>(); - map.putAll(weightParams); - map.putAll(biasParams); - return map; - } - - @Override - public void clear() { - weightParams.clear(); - biasParams.clear(); - paramsList = null; - weightParamsList = null; - biasParamsList = null; - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index adcbc4aca68b..fa39dc980132 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -199,9 +199,9 @@ protected void doInit(){ int[] inputShape = input.shape().clone(); // inputShape[0] = -1; //TODO THIS DOESN'T ENABLE VARIABLE SIZE MINIBATCHES SDVariable inputVar = sameDiff.var(INPUT_KEY, inputShape); - Map paramShapes = layerConf().paramShapes(); + Map paramShapes = layerConf().getLayerParams().getParamShapes(); Map params = new LinkedHashMap<>(); - for(String s : layerConf().paramKeys()){ + for(String s : paramShapes.keySet()){ int[] ps = paramShapes.get(s); SDVariable v = sameDiff.var(s, ps); params.put(s, v); From 79af7a8eeaccd589692b03d8ea4bf2a873daf584 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 20 Feb 2018 14:44:19 +1100 Subject: [PATCH 31/34] API tweaks, add MinimalSameDiffDense example --- .../testlayers/MinimalSameDiffDense.java | 69 +++++++++++++++++++ .../samediff/testlayers/SameDiffConv.java | 8 +-- .../samediff/testlayers/SameDiffDense.java | 6 +- .../samediff/AbstractSameDiffLayer.java | 6 ++ .../layers/samediff/BaseSameDiffLayer.java | 18 ++++- .../nn/layers/samediff/SameDiffLayer.java | 11 +-- 6 files changed, 104 insertions(+), 14 deletions(-) create mode 100644 deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java new file mode 100644 index 000000000000..fa73d1824341 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java @@ -0,0 +1,69 @@ +package org.deeplearning4j.nn.layers.samediff.testlayers; + +import lombok.Data; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.samediff.BaseSameDiffLayer; +import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; +import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.nn.weights.WeightInit; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +@Data +public class MinimalSameDiffDense extends BaseSameDiffLayer { + + private int nIn; + private int nOut; + private Activation activation; + + public MinimalSameDiffDense(int nIn, int nOut, Activation activation, WeightInit weightInit){ + this.nIn = nIn; + this.nOut = nOut; + this.activation = activation; + this.weightInit = weightInit; + } + + protected MinimalSameDiffDense(){ + //For JSON serialization + } + + @Override + public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { + SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); + SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + return Collections.singletonList(activation.asSameDiff("out", sd, z)); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return InputType.feedForward(nOut); + } + + @Override + public void defineParameters(SDLayerParams params) { + params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, new int[]{nIn, nOut}); + params.addBiasParam(DefaultParamInitializer.BIAS_KEY, new int[]{1, nOut}); + } + + @Override + public void initializeParameters(Map params) { + params.get(DefaultParamInitializer.BIAS_KEY).assign(0); + initWeights(nIn, nOut, weightInit, params.get(DefaultParamInitializer.WEIGHT_KEY)); + } + + //OPTIONAL methods: +// public void setNIn(InputType inputType, boolean override) +// public InputPreProcessor getPreProcessorForInputType(InputType inputType) +// public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index 39abd04e76ca..b0e63036d48d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -22,7 +22,7 @@ import java.util.*; @Data -@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) public class SameDiffConv extends BaseSameDiffLayer { @@ -103,7 +103,7 @@ public void initializeParameters(Map params) { } @Override - public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { + public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable) { SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); @@ -125,9 +125,7 @@ public List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map params){ } @Override - public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { + public List defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable) { SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); SDVariable mmul = sd.mmul("mmul", layerInput, weights); SDVariable z = mmul.add("z", bias); - SDVariable out = activation.asSameDiff("out", sd, z); - - return Collections.singletonList("out"); + return Collections.singletonList(activation.asSameDiff("out", sd, z)); } @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index b6151c958d30..32bb39f6d71f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -9,6 +9,8 @@ import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.SameDiffParamInitializer; +import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.nn.weights.WeightInitUtil; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; @@ -111,6 +113,10 @@ public char paramReshapeOrder(String param){ return 'c'; } + protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array){ + WeightInitUtil.initWeights(fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); + } + public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ if(Double.isNaN(l1)){ l1 = b.getL1(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index db22189115da..700fa330a428 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -37,7 +37,23 @@ protected BaseSameDiffLayer(){ //No op constructor for Jackson } - public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + public abstract List defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable); + + @Override + public void setNIn(InputType inputType, boolean override) { + //Default implementation: no-op + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + //Default implementation: no-op + return null; + } + + @Override + public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + //Default implementation: no op + } //================================================================================================================== diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index fa39dc980132..c5a3fffe78c0 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -206,15 +206,18 @@ protected void doInit(){ SDVariable v = sameDiff.var(s, ps); params.put(s, v); } - List outputKeys = bl.defineLayer(sameDiff, inputVar, params); - if(outputKeys == null || outputKeys.size() != 1){ - throw new IllegalStateException("Invalid output keys: " + outputKeys); + List layerOutputs = bl.defineLayer(sameDiff, inputVar, params); + if(layerOutputs == null || layerOutputs.size() != 1){ + throw new IllegalStateException("Invalid outputs: " + layerOutputs); } for(Map.Entry e : p.entrySet()){ sameDiff.associateArrayWithVariable(e.getValue(), sameDiff.getVariable(e.getKey())); } - this.outputKeys = outputKeys; + this.outputKeys = new ArrayList<>(); + for(SDVariable sdv : layerOutputs){ + outputKeys.add(sdv.getVarName()); + } } } From 9e0ea03cbe7b513158a293e9f5f5c83ea922be1a Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 21 Feb 2018 13:17:54 +1100 Subject: [PATCH 32/34] Javadoc and cleanup --- .../samediff/AbstractSameDiffLayer.java | 19 ++++ .../layers/samediff/BaseSameDiffLayer.java | 26 ++++++ .../layers/samediff/NoParamSameDiffLayer.java | 92 ------------------- .../conf/layers/samediff/SDLayerParams.java | 56 +++++++++-- 4 files changed, 94 insertions(+), 99 deletions(-) delete mode 100644 deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 32bb39f6d71f..5e74961f49c0 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -62,10 +62,23 @@ public SDLayerParams getLayerParams(){ @Override public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); + /** + * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, int...)} and + * {@link SDLayerParams#addBiasParam(String, int[])} + * @param params Object used to set parameters for this layer + */ public abstract void defineParameters(SDLayerParams params); + /** + * Set the initial parameter values for this layer, if required + * @param params Parameter arrays that may be initialized + */ public abstract void initializeParameters(Map params); + /** + * Apply the global configuration (weight init, activation function, etc) to this layer + * @param globalConfig Global configuration + */ public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); @Override @@ -109,6 +122,12 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { return new LayerMemoryReport(); //TODO } + /** + * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In most cases, + * this can/should be left + * @param param Name of the parameter + * @return Memory layout ('c' or 'f') of the parameter + */ public char paramReshapeOrder(String param){ return 'c'; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java index 700fa330a428..46460bcb3494 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/BaseSameDiffLayer.java @@ -22,6 +22,29 @@ import java.util.List; import java.util.Map; +/** + * A base layer used for implementing Deeplearning4j layers using SameDiff. These layers are not scoring/output layers: + * that is, they should be used as the intermediate layer in a network only. Deeplearning4j SameDiff output layers will + * be added at a later date.
+ * NOTE: At present, only forward pass is supported. Backward pass will be added at a future date.
+ *
+ * To implement a Deeplearinng layer using SameDiff, extend this class.
+ * There are 4 required methods:
+ * - defineLayer: Defines the forward pass for the layer
+ * - defineParameters: Define the layer's parameters in a way suitable for DL4J
+ * - initializeParameters: if required, set the initial parameter values for the layer
+ * - getOutputType: determine the type of output/activations for the layer (without actually executing the layer's + * forward pass)
+ *
+ * Furthermore, there are 3 optional methods:
+ * - setNIn(InputType inputType, boolean override): if implemented, set the number of inputs to the layer during network + * initialization
+ * - getPreProcessorForInputType: return the preprocessor that should be added (if any), for the given input type
+ * - applyGlobalConfigToLayer: apply any global configuration options (weight init, activation functions etc) to the + * layer's configuration.
+ * + * @author Alex Black + */ @Data @EqualsAndHashCode(callSuper = true) public abstract class BaseSameDiffLayer extends AbstractSameDiffLayer { @@ -74,6 +97,9 @@ public static abstract class Builder> extends AbstractSameD protected WeightInit weightInit = WeightInit.XAVIER; + /** + * @param weightInit Weight initialization to use for the layer + */ public T weightInit(WeightInit weightInit){ this.weightInit = weightInit; return (T)this; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java deleted file mode 100644 index 4b0bffc60cb9..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/NoParamSameDiffLayer.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.deeplearning4j.nn.conf.layers.samediff; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.params.EmptyParamInitializer; -import org.deeplearning4j.nn.params.SameDiffParamInitializer; -import org.deeplearning4j.optimize.api.IterationListener; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.learning.config.IUpdater; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -@Data -@EqualsAndHashCode(callSuper = true) -public abstract class NoParamSameDiffLayer extends Layer { - - protected NoParamSameDiffLayer(Builder builder){ - super(builder); - } - - protected NoParamSameDiffLayer(){ - //No op constructor for Jackson - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType){ - return inputType; - } - - @Override - public void setNIn(InputType inputType, boolean override){ - //No op - } - - @Override - public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); - - public abstract void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig); - - @Override - public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams); - - //================================================================================================================== - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - @Override - public double getL1ByParam(String paramName) { - return 0.0; //No params - } - - @Override - public double getL2ByParam(String paramName) { - return 0.0; //No params - } - - @Override - public IUpdater getUpdaterByParam(String paramName){ - throw new UnsupportedOperationException("No parameters for this layer"); - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return new LayerMemoryReport(); //TODO - } - - public void applyGlobalConfig(NeuralNetConfiguration.Builder b){ - applyGlobalConfigToLayer(b); - } - - public static abstract class Builder> extends Layer.Builder { - - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java index b23edc7e5fb7..2afcc641b8ff 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java @@ -1,6 +1,8 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import com.google.common.base.Preconditions; import lombok.NoArgsConstructor; +import lombok.NonNull; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.shade.jackson.annotation.JsonIgnore; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; @@ -9,7 +11,11 @@ import java.util.*; -@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +/** + * SDLayerParams is used to define the parameters for a Deeplearning4j SameDiff layer + * + * @author Alex Black + */ @JsonIgnoreProperties({"paramsList", "weightParamsList", "biasParamsList"}) @NoArgsConstructor public class SDLayerParams { @@ -27,20 +33,42 @@ public SDLayerParams(@JsonProperty("weightParams") Map weightParam this.biasParams = biasParams; } - public void addWeightParam(String paramKey, int[] paramShape) { + /** + * Add a weight parameter to the layer, with the specified shape. For example, a standard fully connected layer + * could have weight parameters with shape [numInputs, layerSize] + * + * @param paramKey The parameter key (name) for the weight parameter + * @param paramShape Shape of the weight parameter array + */ + public void addWeightParam(@NonNull String paramKey, @NonNull int... paramShape) { + Preconditions.checkArgument(paramShape.length > 0, "Provided weight parameter shape is" + + " invalid: length 0 provided for shape. Parameter: " + paramKey); weightParams.put(paramKey, paramShape); paramsList = null; weightParamsList = null; biasParamsList = null; } - public void addBiasParam(String paramKey, int[] paramShape) { + /** + * Add a bias parameter to the layer, with the specified shape. For example, a standard fully connected layer + * could have bias parameters with shape [1, layerSize] + * + * @param paramKey The parameter key (name) for the bias parameter + * @param paramShape Shape of the bias parameter array + */ + public void addBiasParam(@NonNull String paramKey, @NonNull int[] paramShape) { + Preconditions.checkArgument(paramShape.length > 0, "Provided mia- parameter shape is" + + " invalid: length 0 provided for shape. Parameter: " + paramKey); biasParams.put(paramKey, paramShape); paramsList = null; weightParamsList = null; biasParamsList = null; } + /** + * @return Get a list of parameter names / keys (previously added via {@link #addWeightParam(String, int...)} and + * {@link #addBiasParam(String, int[])} + */ public List getParameterKeys() { if(paramsList == null) { List out = new ArrayList<>(); @@ -51,6 +79,10 @@ public List getParameterKeys() { return paramsList; } + /** + * @return Get a list of parameter names / keys for weight parameters only, previously added via + * {@link #addWeightParam(String, int...)} + */ public List getWeightParameterKeys() { if(weightParamsList == null){ weightParamsList = Collections.unmodifiableList(new ArrayList<>(weightParams.keySet())); @@ -58,6 +90,10 @@ public List getWeightParameterKeys() { return weightParamsList; } + /** + * @return Get a list of parameter names / keys for weight parameters only, previously added via + * {@link #addWeightParam(String, int...)} + */ public List getBiasParameterKeys() { if(biasParamsList == null){ biasParamsList = Collections.unmodifiableList(new ArrayList<>(biasParams.keySet())); @@ -65,6 +101,11 @@ public List getBiasParameterKeys() { return biasParamsList; } + /** + * Get the parameter shapes for all parameters + * + * @return Map of parameter shapes, by parameter + */ public Map getParamShapes() { Map map = new LinkedHashMap<>(); map.putAll(weightParams); @@ -72,6 +113,9 @@ public Map getParamShapes() { return map; } + /** + * Clear any previously set weight/bias parameters (including their shapes) + */ public void clear() { weightParams.clear(); biasParams.clear(); @@ -80,6 +124,7 @@ public void clear() { biasParamsList = null; } + @Override public boolean equals(Object o) { if(!(o instanceof SDLayerParams)){ return false; @@ -101,11 +146,8 @@ private static boolean equals(Map first, Map second) return true; } + @Override public int hashCode() { return weightParams.hashCode() ^ biasParams.hashCode(); } - - protected boolean canEqual(Object other) { - return other instanceof SDLayerParams; - } } From 75b49bc8aaccc3745f1124412784c37186fec0f9 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 21 Feb 2018 13:27:02 +1100 Subject: [PATCH 33/34] Add check for existence of no-arg costructor for better UX --- .../layers/samediff/AbstractSameDiffLayer.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 5e74961f49c0..138145c8de57 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -2,6 +2,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -18,6 +19,7 @@ import java.util.Collection; import java.util.Map; +@Slf4j @Data @EqualsAndHashCode(callSuper = true) public abstract class AbstractSameDiffLayer extends Layer { @@ -39,6 +41,19 @@ protected AbstractSameDiffLayer(Builder builder){ this.l2Bias = builder.l2Bias; this.updater = builder.updater; this.biasUpdater = builder.biasUpdater; + + //Check that this class has a no-arg constructor for JSON: better to detect this now and throw an actually + //useful exception, rather than have it fail for users with a difficult to understand message + try{ + getClass().getDeclaredConstructor(); + } catch (NoSuchMethodException e){ + log.warn("***SameDiff layer {} does not have a zero argument (no-arg) constructor.***\nA no-arg constructor " + + "is required for JSON deserialization, which is used for both model saving and distributed (Spark) " + + "training.\nA no-arg constructor (private, protected or public) as well as setters (or simply a " + + "Lombok @Data annotation) should be added to avoid JSON errors later.", getClass().getName()); + } catch (SecurityException e){ + //Ignore + } } protected AbstractSameDiffLayer(){ From ff87f8496b84a19c35493e031a75319cd2dd97b4 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 21 Feb 2018 14:23:02 +1100 Subject: [PATCH 34/34] Remove java 1.8 from pom --- deeplearning4j-core/pom.xml | 10 ---------- .../nn/conf/layers/samediff/AbstractSameDiffLayer.java | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/deeplearning4j-core/pom.xml b/deeplearning4j-core/pom.xml index ffefafda17d3..7ffe676c4ceb 100644 --- a/deeplearning4j-core/pom.xml +++ b/deeplearning4j-core/pom.xml @@ -36,16 +36,6 @@ - - - org.apache.maven.plugins - maven-compiler-plugin - - 1.8 - 1.8 - - - diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 138145c8de57..9d3b3a881d7b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -42,8 +42,8 @@ protected AbstractSameDiffLayer(Builder builder){ this.updater = builder.updater; this.biasUpdater = builder.biasUpdater; - //Check that this class has a no-arg constructor for JSON: better to detect this now and throw an actually - //useful exception, rather than have it fail for users with a difficult to understand message + //Check that this class has a no-arg constructor for JSON: better to detect this now provide useful information + // to pre-empt a failure later for users, which will have a more difficult to understand message try{ getClass().getDeclaredConstructor(); } catch (NoSuchMethodException e){