diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index aeb734c10d25..9888fe040a8c 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -9,6 +9,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D; import org.deeplearning4j.nn.conf.preprocessor.Cnn3DToFeedForwardPreProcessor; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -47,7 +48,7 @@ public void testCnn3DPlain() { int[] widths = {6}; - int[] minibatchSizes = {2, 3}; + int[] minibatchSizes = {3}; int convNIn = 2; int convNOut1 = 3; int convNOut2 = 4; @@ -55,10 +56,10 @@ public void testCnn3DPlain() { int finalNOut = 42; - int[][] kernels = {{1, 1, 1}, {2, 2, 2}}; - int[][] strides = {{1, 1, 1}, {2, 2, 2}}; + int[][] kernels = {{2, 2, 2}}; + int[][] strides = {{1, 1, 1}}; - Activation[] activations = {Activation.RELU}; + Activation[] activations = {Activation.SIGMOID}; ConvolutionMode[] modes = {ConvolutionMode.Truncate, ConvolutionMode.Same}; @@ -72,7 +73,7 @@ public void testCnn3DPlain() { for (int[] stride : strides) { int outDepth = mode == ConvolutionMode.Same ? - depth / stride[0] :(depth - kernel[0]) / stride[0] + 1; + depth / stride[0] : (depth - kernel[0]) / stride[0] + 1; int outHeight = mode == ConvolutionMode.Same ? height / stride[1] : (height - kernel[1]) / stride[1] + 1; int outWidth = mode == ConvolutionMode.Same ? @@ -101,8 +102,8 @@ public void testCnn3DPlain() { .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, - convNOut2, true)) - .setInputType(InputType.convolutional3D(height, width, depth, convNIn)).build(); + convNOut2, true)) + .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); String json = conf.toJson(); MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); @@ -141,5 +142,375 @@ public void testCnn3DPlain() { } } + @Test + public void testCnn3DZeroPadding() { + Nd4j.getRandom().setSeed(42); + + int depth = 4; + int height = 4; + int width = 4; + + + int[] minibatchSizes = {3}; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int denseNOut = 5; + int finalNOut = 42; + + + int[] kernel = {2, 2, 2}; + int[] zeroPadding = {1, 1, 2, 2, 3, 3}; + + Activation[] activations = {Activation.SIGMOID}; + + ConvolutionMode[] modes = {ConvolutionMode.Truncate, ConvolutionMode.Same}; + + for (Activation afn : activations) { + for (int miniBatchSize : minibatchSizes) { + for (ConvolutionMode mode : modes) { + + int outDepth = mode == ConvolutionMode.Same ? + depth : (depth - kernel[0]) + 1; + int outHeight = mode == ConvolutionMode.Same ? + height : (height - kernel[1]) + 1; + int outWidth = mode == ConvolutionMode.Same ? + width : (width - kernel[2]) + 1; + + outDepth += zeroPadding[0] + zeroPadding[1]; + outHeight += zeroPadding[2] + zeroPadding[3]; + outWidth += zeroPadding[4] + zeroPadding[5]; + + INDArray input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width}); + INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); + for (int i = 0; i < miniBatchSize; i++) { + labels.putScalar(new int[]{i, i % finalNOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) + .dist(new NormalDistribution(0, 1)) + .list() + .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .nIn(convNIn).nOut(convNOut1).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(1, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .nIn(convNOut1).nOut(convNOut2).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(2, new ZeroPadding3DLayer.Builder(zeroPadding).build()) + .layer(3, new DenseLayer.Builder().nOut(denseNOut).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(finalNOut).build()) + .inputPreProcessor(3, + new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, + convNOut2, true)) + .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + + String json = conf.toJson(); + MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn + + ", kernel = " + Arrays.toString(kernel) + ", mode = " + mode.toString() + + ", input depth " + depth + ", input height " + height + + ", input width " + width; + + if (PRINT_RESULTS) { + log.info(msg); + for (int j = 0; j < net.getnLayers(); j++) { + log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + + } + } + } + + + @Test + public void testCnn3DPooling() { + Nd4j.getRandom().setSeed(42); + + int depth = 4; + int height = 4; + int width = 4; + + + int[] minibatchSizes = {3}; + int convNIn = 2; + int convNOut = 4; + int denseNOut = 5; + int finalNOut = 42; + + int[] kernel = {2, 2, 2}; + + Activation[] activations = {Activation.SIGMOID}; + + Subsampling3DLayer.PoolingType[] poolModes = {Subsampling3DLayer.PoolingType.AVG}; + + ConvolutionMode[] modes = {ConvolutionMode.Truncate}; + + for (Activation afn : activations) { + for (int miniBatchSize : minibatchSizes) { + for (Subsampling3DLayer.PoolingType pool : poolModes) { + for (ConvolutionMode mode : modes) { + + int outDepth = depth / kernel[0]; + int outHeight = height / kernel[1]; + int outWidth = width / kernel[2]; + + INDArray input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width}); + INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); + for (int i = 0; i < miniBatchSize; i++) { + labels.putScalar(new int[]{i, i % finalNOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()) + .weightInit(WeightInit.XAVIER) + .dist(new NormalDistribution(0, 1)) + .list() + .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .nIn(convNIn).nOut(convNOut).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(1, new Subsampling3DLayer.Builder(kernel) + .poolingType(pool).convolutionMode(mode).build()) + .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(finalNOut).build()) + .inputPreProcessor(2, + new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, + convNOut, true)) + .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + + String json = conf.toJson(); + MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn + + ", kernel = " + Arrays.toString(kernel) + ", mode = " + mode.toString() + + ", input depth " + depth + ", input height " + height + + ", input width " + width; + + if (PRINT_RESULTS) { + log.info(msg); + for (int j = 0; j < net.getnLayers(); j++) { + log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn3DUpsampling() { + Nd4j.getRandom().setSeed(42); + + int depth = 2; + int height = 2; + int width = 2; + + + int[] minibatchSizes = {3}; + int convNIn = 2; + int convNOut = 4; + int denseNOut = 5; + int finalNOut = 42; + + + int[] upsamplingSize = {2, 2, 2}; + + Activation[] activations = {Activation.SIGMOID}; + + + ConvolutionMode[] modes = {ConvolutionMode.Truncate}; + + for (Activation afn : activations) { + for (int miniBatchSize : minibatchSizes) { + for (ConvolutionMode mode : modes) { + + int outDepth = depth * upsamplingSize[0]; + int outHeight = height * upsamplingSize[1]; + int outWidth = width * upsamplingSize[2]; + + INDArray input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width}); + INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); + for (int i = 0; i < miniBatchSize; i++) { + labels.putScalar(new int[]{i, i % finalNOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) + .dist(new NormalDistribution(0, 1)) + .list() + .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .nIn(convNIn).nOut(convNOut).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(1, new Upsampling3D.Builder(upsamplingSize[0]).build()) + .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(finalNOut).build()) + .inputPreProcessor(2, + new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, + convNOut, true)) + .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + + String json = conf.toJson(); + MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn + + ", kernel = " + Arrays.toString(upsamplingSize) + ", mode = " + mode.toString() + + ", input depth " + depth + ", input height " + height + + ", input width " + width; + + if (PRINT_RESULTS) { + log.info(msg); + for (int j = 0; j < net.getnLayers(); j++) { + log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + + } + } + } + } + @Test + public void testCnn3DCropping() { + Nd4j.getRandom().setSeed(42); + + int depth = 6; + int height = 6; + int width = 6; + + + int[] minibatchSizes = {3}; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int denseNOut = 5; + int finalNOut = 8; + + + int[] kernel = {1, 1, 1}; + int[] cropping = {0, 0, 1, 1, 2, 2}; + + Activation[] activations = {Activation.SIGMOID}; + + ConvolutionMode[] modes = {ConvolutionMode.Same}; + + for (Activation afn : activations) { + for (int miniBatchSize : minibatchSizes) { + for (ConvolutionMode mode : modes) { + + int outDepth = mode == ConvolutionMode.Same ? + depth : (depth - kernel[0]) + 1; + int outHeight = mode == ConvolutionMode.Same ? + height : (height - kernel[1]) + 1; + int outWidth = mode == ConvolutionMode.Same ? + width : (width - kernel[2]) + 1; + + outDepth -= cropping[0] + cropping[1]; + outHeight -= cropping[2] + cropping[3]; + outWidth -= cropping[4] + cropping[5]; + + INDArray input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width}); + INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); + for (int i = 0; i < miniBatchSize; i++) { + labels.putScalar(new int[]{i, i % finalNOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) + .dist(new NormalDistribution(0, 1)) + .list() + .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .nIn(convNIn).nOut(convNOut1).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(1, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .nIn(convNOut1).nOut(convNOut2).hasBias(false) + .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) + .build()) + .layer(2, new Cropping3D.Builder(cropping).build()) + .layer(3, new DenseLayer.Builder().nOut(denseNOut).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(finalNOut).build()) + .inputPreProcessor(3, + new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, + convNOut2, true)) + .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + + String json = conf.toJson(); + MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn + + ", kernel = " + Arrays.toString(kernel) + ", mode = " + mode.toString() + + ", input depth " + depth + ", input height " + height + + ", input width " + width; + + if (PRINT_RESULTS) { + log.info(msg); + for (int j = 0; j < net.getnLayers(); j++) { + log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + + } + } + } } diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java index 84cea6b29937..5aef38ea59db 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java @@ -103,6 +103,7 @@ public class KerasLayerConfiguration { private final String LAYER_CLASS_NAME_LEAKY_RELU = "LeakyReLU"; private final String LAYER_CLASS_NAME_UPSAMPLING_1D = "UpSampling1D"; private final String LAYER_CLASS_NAME_UPSAMPLING_2D = "UpSampling2D"; + private final String LAYER_CLASS_NAME_UPSAMPLING_3D = "UpSampling3D"; private final String LAYER_CLASS_NAME_SEPARABLE_CONVOLUTION_2D = ""; // 1: SeparableConvolution2D, 2: SeparableConv2D private final String LAYER_CLASS_NAME_DECONVOLUTION_2D = ""; // 1: Deconvolution2D, 2: Conv2DTranspose @@ -201,6 +202,7 @@ public class KerasLayerConfiguration { private final String LAYER_FIELD_POOL_1D_STRIDES = ""; // 1: stride, 2: strides private final String LAYER_FIELD_UPSAMPLING_1D_SIZE = ""; // 1: length, 2: size private final String LAYER_FIELD_UPSAMPLING_2D_SIZE = "size"; + private final String LAYER_FIELD_UPSAMPLING_3D_SIZE = "size"; /* Keras convolution border modes. */ diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java index 5fe040ff434c..9e0cdd36e8a3 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java @@ -310,10 +310,10 @@ static int[] getPaddingFromConfig(Map layerConfig, throw new InvalidKerasConfigurationException( "Field " + layerField + " not found in Keras cropping or padding layer"); int[] padding; - if (dimension == 2) { + if (dimension >= 2) { List paddingList; // For 2D layers, padding/cropping can either be a pair [[x_0, x_1].[y_0, y_1]] or a pair [x, y] - // or a single integer x. yeah, really. + // or a single integer x. Likewise for the 3D case. try { List paddingNoCast = (List) innerConfig.get(layerField); boolean isNested; @@ -321,34 +321,36 @@ static int[] getPaddingFromConfig(Map layerConfig, @SuppressWarnings("unchecked") List firstItem = (List) paddingNoCast.get(0); isNested = true; - paddingList = new ArrayList<>(4); + paddingList = new ArrayList<>(2 * dimension); } catch (Exception e) { int firstItem = (int) paddingNoCast.get(0); isNested = false; - paddingList = new ArrayList<>(2); + paddingList = new ArrayList<>(dimension); } - if ((paddingNoCast.size() == 2) && !isNested) { - paddingList.add((int) paddingNoCast.get(0)); - paddingList.add((int) paddingNoCast.get(1)); + if ((paddingNoCast.size() == dimension) && !isNested) { + for (int i=0; i < dimension; i++) + paddingList.add((int) paddingNoCast.get(i)); padding = ArrayUtil.toArray(paddingList); - } else if ((paddingNoCast.size() == 2) && isNested) { - @SuppressWarnings("unchecked") - List first = (List) paddingNoCast.get(0); - paddingList.add((first.get(0))); - paddingList.add((first.get(1))); - @SuppressWarnings("unchecked") - List second = (List) paddingNoCast.get(1); - paddingList.add((second.get(0))); - paddingList.add((second.get(1))); + } else if ((paddingNoCast.size() == dimension) && isNested) { + for (int j=0; j < dimension; j++) { + @SuppressWarnings("unchecked") + List item = (List) paddingNoCast.get(0); + paddingList.add((item.get(0))); + paddingList.add((item.get(1))); + } padding = ArrayUtil.toArray(paddingList); } else { - throw new InvalidKerasConfigurationException("Found Keras ZeroPadding2D layer with invalid " - + paddingList.size() + "D padding."); + throw new InvalidKerasConfigurationException("Found Keras ZeroPadding" + dimension + + "D layer with invalid " + paddingList.size() + "D padding."); } } catch (Exception e) { int paddingInt = (int) innerConfig.get(layerField); - padding = new int[]{paddingInt, paddingInt}; + if (dimension == 2) { + padding = new int[]{paddingInt, paddingInt, paddingInt, paddingInt}; + } else { + padding = new int[]{paddingInt, paddingInt, paddingInt, paddingInt, paddingInt, paddingInt}; + } } } else if (dimension == 1) { diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java new file mode 100644 index 000000000000..5ffc525da208 --- /dev/null +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java @@ -0,0 +1,80 @@ +package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D; +import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; + +import java.util.Map; + +import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromConfig; + +/** + * Imports a Keras Cropping 3D layer. + * + * @author Max Pumperla + */ +@Slf4j +@Data +@EqualsAndHashCode(callSuper = false) +public class KerasCropping3D extends KerasLayer { + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration. + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasCropping3D(Map layerConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + this(layerConfig, true); + } + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration + * @param enforceTrainingConfig whether to enforce training-related configuration options + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasCropping3D(Map layerConfig, boolean enforceTrainingConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + super(layerConfig, enforceTrainingConfig); + String croppingField = conf.getLAYER_FIELD_CROPPING(); + int[] cropping = getPaddingFromConfig(layerConfig, conf, croppingField, 3); + Cropping3D.Builder builder = new Cropping3D.Builder(cropping) + .name(this.layerName).dropOut(this.dropout); + this.layer = builder.build(); + this.vertex = null; + } + + /** + * Get DL4J Cropping3D layer. + * + * @return Cropping3D layer + */ + public Cropping3D getCropping3DLayer() { + return (Cropping3D) this.layer; + } + + /** + * Get layer output type. + * + * @param inputType Array of InputTypes + * @return output type as InputType + * @throws InvalidKerasConfigurationException Invalid Keras config + */ + @Override + public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras Cropping 3D layer accepts only one input (received " + inputType.length + ")"); + return this.getCropping3DLayer().getOutputType(-1, inputType[0]); + } +} diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java new file mode 100644 index 000000000000..0ca4a6467003 --- /dev/null +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java @@ -0,0 +1,97 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; + +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Upsampling2D; +import org.deeplearning4j.nn.conf.layers.Upsampling3D; +import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; + +import java.util.Map; + + +/** + * Keras Upsampling3D layer support + * + * @author Max Pumperla + */ +public class KerasUpsampling3D extends KerasLayer { + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration. + * @throws InvalidKerasConfigurationException Invalid Keras configuration exception + * @throws UnsupportedKerasConfigurationException Unsupported Keras configuration exception + */ + public KerasUpsampling3D(Map layerConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + this(layerConfig, true); + } + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration + * @param enforceTrainingConfig whether to enforce training-related configuration options + * @throws InvalidKerasConfigurationException Invalid Keras configuration exception + * @throws UnsupportedKerasConfigurationException Invalid Keras configuration exception + */ + public KerasUpsampling3D(Map layerConfig, boolean enforceTrainingConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + super(layerConfig, enforceTrainingConfig); + + int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 3, conf); + // TODO: make sure to allow different sizes. + + Upsampling3D.Builder builder = new Upsampling3D.Builder() + .name(this.layerName) + .dropOut(this.dropout) + .size(size[0]); + + this.layer = builder.build(); + this.vertex = null; + } + + /** + * Get DL4J Upsampling3D layer. + * + * @return Upsampling3D layer + */ + public Upsampling3D getUpsampling3DLayer() { + return (Upsampling3D) this.layer; + } + + /** + * Get layer output type. + * + * @param inputType Array of InputTypes + * @return output type as InputType + * @throws InvalidKerasConfigurationException Invalid Keras config + */ + @Override + public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras Upsampling 3D layer accepts only one input (received " + inputType.length + ")"); + return this.getUpsampling3DLayer().getOutputType(-1, inputType[0]); + } + +} diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java index 2121fd66990c..e6626b339f84 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java @@ -56,9 +56,9 @@ public KerasZeroPadding1D(Map layerConfig, boolean enforceTraini } /** - * Get DL4J SubsamplingLayer. + * Get DL4J ZeroPadding1DLayer. * - * @return SubsamplingLayer + * @return ZeroPadding1DLayer */ public ZeroPadding1DLayer getZeroPadding1DLayer() { return (ZeroPadding1DLayer) this.layer; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java index 76f289fece35..12a75547cea1 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java @@ -56,9 +56,9 @@ public KerasZeroPadding2D(Map layerConfig, boolean enforceTraini } /** - * Get DL4J SubsamplingLayer. + * Get DL4J ZeroPadding2DLayer. * - * @return SubsamplingLayer + * @return ZeroPadding2DLayer */ public ZeroPaddingLayer getZeroPadding2DLayer() { return (ZeroPaddingLayer) this.layer; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java new file mode 100644 index 000000000000..5b20935c4d0d --- /dev/null +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java @@ -0,0 +1,82 @@ +package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer; +import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer; +import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; + +import java.util.Map; + +import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromConfig; + +/** + * Imports a Keras ZeroPadding 3D layer. + * + * @author Max Pumperla + */ +@Slf4j +@Data +@EqualsAndHashCode(callSuper = false) +public class KerasZeroPadding3D extends KerasLayer { + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration. + * + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasZeroPadding3D(Map layerConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + this(layerConfig, true); + } + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration + * @param enforceTrainingConfig whether to enforce training-related configuration options + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasZeroPadding3D(Map layerConfig, boolean enforceTrainingConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + super(layerConfig, enforceTrainingConfig); + String paddingField = conf.getLAYER_FIELD_ZERO_PADDING(); + int[] padding = getPaddingFromConfig(layerConfig, conf, paddingField,3); + ZeroPadding3DLayer.Builder builder = new ZeroPadding3DLayer.Builder(padding) + .name(this.layerName).dropOut(this.dropout); + this.layer = builder.build(); + this.vertex = null; + } + + /** + * Get DL4J ZeroPadding3DLayer. + * + * @return ZeroPadding3DLayer + */ + public ZeroPadding3DLayer getZeroPadding3DLayer() { + return (ZeroPadding3DLayer) this.layer; + } + + /** + * Get layer output type. + * + * @param inputType Array of InputTypes + * @return output type as InputType + * @throws InvalidKerasConfigurationException Invalid Keras config + */ + @Override + public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras ZeroPadding3D layer accepts only one input (received " + inputType.length + ")"); + return this.getZeroPadding3DLayer().getOutputType(-1, inputType[0]); + } +} diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java new file mode 100644 index 000000000000..49542fdf7ff4 --- /dev/null +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java @@ -0,0 +1,98 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.pooling; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer; +import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; + +import java.util.Map; + +import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; + +/** + * Imports a Keras 3D Pooling layer as a DL4J Subsampling3D layer. + * + * @author Max Pumperla + */ +@Slf4j +public class KerasPooling3D extends KerasLayer { + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration. + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasPooling3D(Map layerConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + this(layerConfig, true); + } + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration + * @param enforceTrainingConfig whether to enforce training-related configuration options + * @throws InvalidKerasConfigurationException Invalid Keras config + * @throws UnsupportedKerasConfigurationException Unsupported Keras config + */ + public KerasPooling3D(Map layerConfig, boolean enforceTrainingConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + super(layerConfig, enforceTrainingConfig); + Subsampling3DLayer.Builder builder = new Subsampling3DLayer.Builder( + KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.layerName) + .dropOut(this.dropout) + .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) + .kernelSize(getKernelSizeFromConfig(layerConfig, 3, conf, kerasMajorVersion)) + .stride(getStrideFromConfig(layerConfig, 3, conf)); + int[] padding = getPaddingFromBorderModeConfig(layerConfig, 3, conf, kerasMajorVersion); + if (padding != null) + builder.padding(padding); + this.layer = builder.build(); + this.vertex = null; + } + + /** + * Get DL4J Subsampling3DLayer. + * + * @return Subsampling3DLayer + */ + public Subsampling3DLayer getSubsampling3DLayer() { + return (Subsampling3DLayer) this.layer; + } + + /** + * Get layer output type. + * + * @param inputType Array of InputTypes + * @return output type as InputType + * @throws InvalidKerasConfigurationException Invalid Keras config + */ + @Override + public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras Subsampling/Pooling 3D layer accepts only one input (received " + inputType.length + ")"); + return this.getSubsampling3DLayer().getOutputType(-1, inputType[0]); + } +} diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPoolingUtils.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPoolingUtils.java index f13854fb9038..6f4aed4aacb6 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPoolingUtils.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPoolingUtils.java @@ -40,11 +40,13 @@ public static PoolingType mapPoolingType(String className, KerasLayerConfigurati PoolingType poolingType; if (className.equals(conf.getLAYER_CLASS_NAME_MAX_POOLING_2D()) || className.equals(conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()) || + className.equals(conf.getLAYER_CLASS_NAME_MAX_POOLING_3D()) || className.equals(conf.getLAYER_CLASS_NAME_GLOBAL_MAX_POOLING_1D()) || className.equals(conf.getLAYER_CLASS_NAME_GLOBAL_MAX_POOLING_2D())) { poolingType = PoolingType.MAX; } else if (className.equals(conf.getLAYER_CLASS_NAME_AVERAGE_POOLING_2D()) || className.equals(conf.getLAYER_CLASS_NAME_AVERAGE_POOLING_1D()) || + className.equals(conf.getLAYER_CLASS_NAME_AVERAGE_POOLING_3D()) || className.equals(conf.getLAYER_CLASS_NAME_GLOBAL_AVERAGE_POOLING_1D()) || className.equals(conf.getLAYER_CLASS_NAME_GLOBAL_AVERAGE_POOLING_2D())) { poolingType = PoolingType.AVG; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java index 7332267c1902..dfe312afb038 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java @@ -36,6 +36,7 @@ import org.deeplearning4j.nn.modelimport.keras.layers.pooling.KerasGlobalPooling; import org.deeplearning4j.nn.modelimport.keras.layers.pooling.KerasPooling1D; import org.deeplearning4j.nn.modelimport.keras.layers.pooling.KerasPooling2D; +import org.deeplearning4j.nn.modelimport.keras.layers.pooling.KerasPooling3D; import org.deeplearning4j.nn.modelimport.keras.layers.recurrent.KerasLstm; import org.deeplearning4j.nn.modelimport.keras.layers.recurrent.KerasSimpleRnn; import org.deeplearning4j.nn.modelimport.keras.layers.wrappers.KerasBidirectional; @@ -219,6 +220,9 @@ public static KerasLayer getKerasLayerFromConfig(Map layerConfig layer = new KerasAtrousConvolution1D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_SEPARABLE_CONVOLUTION_2D())) { layer = new KerasSeparableConvolution2D(layerConfig, enforceTrainingConfig); + } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_MAX_POOLING_3D()) || + layerClassName.equals(conf.getLAYER_CLASS_NAME_AVERAGE_POOLING_3D())) { + layer = new KerasPooling3D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_MAX_POOLING_2D()) || layerClassName.equals(conf.getLAYER_CLASS_NAME_AVERAGE_POOLING_2D())) { layer = new KerasPooling2D(layerConfig, enforceTrainingConfig); @@ -263,10 +267,14 @@ public static KerasLayer getKerasLayerFromConfig(Map layerConfig layer = new KerasZeroPadding1D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_ZERO_PADDING_2D())) { layer = new KerasZeroPadding2D(layerConfig, enforceTrainingConfig); + } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_ZERO_PADDING_3D())) { + layer = new KerasZeroPadding3D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_UPSAMPLING_1D())) { layer = new KerasUpsampling1D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_UPSAMPLING_2D())) { layer = new KerasUpsampling2D(layerConfig, enforceTrainingConfig); + } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_CROPPING_3D())) { + layer = new KerasCropping3D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_CROPPING_2D())) { layer = new KerasCropping2D(layerConfig, enforceTrainingConfig); } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_CROPPING_1D())) { diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java index 8cda45389831..5b4453bf3e71 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java @@ -54,7 +54,7 @@ public void testCropping1DLayer() throws Exception { private void buildCroppingSingleDim1DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_CROPPING_1D()); Map config = new HashMap<>(); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); config.put(conf.getLAYER_FIELD_CROPPING(), CROPPING); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java index f89a582032fc..8f1097f67948 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java @@ -55,7 +55,7 @@ public void testCropping2DLayer() throws Exception { private void buildCropping2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_CROPPING_2D()); Map config = new HashMap<>(); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); ArrayList padding = new ArrayList() {{ @@ -77,7 +77,7 @@ private void buildCropping2DLayer(KerasLayerConfiguration conf, Integer kerasVer private void buildCroppingSingleDim2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_CROPPING_2D()); Map config = new HashMap<>(); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); config.put(conf.getLAYER_FIELD_CROPPING(), CROPPING[0]); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java new file mode 100644 index 000000000000..f1dae70d2fee --- /dev/null +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java @@ -0,0 +1,96 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolution; + +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D; +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D; +import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasCropping2D; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasCropping3D; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author Max Pumperla + */ +public class KerasCropping3DTest { + + private final String LAYER_NAME = "cropping_3D_layer"; + private final int[] CROPPING = new int[]{2, 3, 5}; + + private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + + @Test + public void testCropping3DLayer() throws Exception { + Integer keras1 = 1; + buildCropping3DLayer(conf1, keras1); + Integer keras2 = 2; + buildCropping3DLayer(conf2, keras2); + buildCroppingSingleDim3DLayer(conf1, keras1); + buildCroppingSingleDim3DLayer(conf2, keras2); + } + + + private void buildCropping3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) + throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_CROPPING_3D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + ArrayList padding = new ArrayList() {{ + for (int i : CROPPING) add(i); + }}; + config.put(conf.getLAYER_FIELD_CROPPING(), padding); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + Cropping3D layer = new KerasCropping3D(layerConfig).getCropping3DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(CROPPING[0], layer.getCropping()[0]); + assertEquals(CROPPING[0], layer.getCropping()[1]); + assertEquals(CROPPING[1], layer.getCropping()[2]); + assertEquals(CROPPING[1], layer.getCropping()[3]); + assertEquals(CROPPING[2], layer.getCropping()[4]); + assertEquals(CROPPING[2], layer.getCropping()[5]); + + } + + private void buildCroppingSingleDim3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) + throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_CROPPING_3D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + config.put(conf.getLAYER_FIELD_CROPPING(), CROPPING[0]); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + Cropping3D layer = new KerasCropping3D(layerConfig).getCropping3DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(CROPPING[0], layer.getCropping()[0]); + assertEquals(CROPPING[0], layer.getCropping()[1]); + } +} diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java index 1b61d3d4d8a9..933c067be948 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java @@ -63,7 +63,7 @@ private void buildUpsampling1DLayer(KerasLayerConfiguration conf, Integer kerasV Upsampling1D layer = new KerasUpsampling1D(layerConfig).getUpsampling1DLayer(); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(size, layer.getSize()); + assertEquals(size, layer.getSize()[0]); } } diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java new file mode 100644 index 000000000000..b7c81a1270e5 --- /dev/null +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java @@ -0,0 +1,72 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolution; + +import org.deeplearning4j.nn.conf.layers.Upsampling2D; +import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling2D; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author Max Pumperla + */ +public class KerasUpsampling3DTest { + + private final String LAYER_NAME = "upsampling_3D_layer"; + private int[] size = new int[]{2, 2, 2}; + + private Integer keras1 = 1; + private Integer keras2 = 2; + private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + + @Test + public void testUpsampling3DLayer() throws Exception { + buildUpsampling3DLayer(conf1, keras1); + buildUpsampling3DLayer(conf2, keras2); + } + + + private void buildUpsampling3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_UPSAMPLING_3D()); + Map config = new HashMap<>(); + List sizeList = new ArrayList<>(); + sizeList.add(size[0]); + sizeList.add(size[1]); + sizeList.add(size[2]); + config.put(conf.getLAYER_FIELD_UPSAMPLING_3D_SIZE(), sizeList); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + Upsampling2D layer = new KerasUpsampling2D(layerConfig).getUpsampling2DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(size[0], layer.getSize()); + } + +} diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java index ca57de5add51..b67ac1fc76d1 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java @@ -48,7 +48,7 @@ public void testZeroPadding1DLayer() throws Exception { private void buildZeroPadding1DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_ZERO_PADDING_1D()); Map config = new HashMap<>(); String layerName = "zero_padding_1D_layer"; config.put(conf.getLAYER_FIELD_NAME(), layerName); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java index 422d70950b17..50822dc518fa 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java @@ -55,7 +55,7 @@ public void testZeroPadding2DLayer() throws Exception { private void buildZeroPadding2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_ZERO_PADDING_2D()); Map config = new HashMap<>(); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); ArrayList padding = new ArrayList() {{ @@ -77,7 +77,7 @@ private void buildZeroPadding2DLayer(KerasLayerConfiguration conf, Integer keras private void buildZeroPaddingSingleDim2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_ZERO_PADDING_2D()); Map config = new HashMap<>(); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); config.put(conf.getLAYER_FIELD_ZERO_PADDING(), ZERO_PADDING[0]); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java new file mode 100644 index 000000000000..4be92668d033 --- /dev/null +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java @@ -0,0 +1,96 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolution; + +import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer; +import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer; +import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasZeroPadding2D; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasZeroPadding3D; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author Max Pumperla + */ +public class KerasZeroPadding3DTest { + + private final String LAYER_NAME = "zero_padding_3D_layer"; + private final int[] ZERO_PADDING = new int[]{2, 3, 4}; + + private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + + @Test + public void testZeroPadding3DLayer() throws Exception { + Integer keras1 = 1; + buildZeroPadding3DLayer(conf1, keras1); + Integer keras2 = 2; + buildZeroPadding3DLayer(conf2, keras2); + buildZeroPaddingSingleDim3DLayer(conf1, keras1); + buildZeroPaddingSingleDim3DLayer(conf2, keras2); + } + + + private void buildZeroPadding3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) + throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_ZERO_PADDING_3D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + ArrayList padding = new ArrayList() {{ + for (int i : ZERO_PADDING) add(i); + }}; + config.put(conf.getLAYER_FIELD_ZERO_PADDING(), padding); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + ZeroPadding3DLayer layer = new KerasZeroPadding3D(layerConfig).getZeroPadding3DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); + assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); + assertEquals(ZERO_PADDING[1], layer.getPadding()[2]); + assertEquals(ZERO_PADDING[1], layer.getPadding()[3]); + assertEquals(ZERO_PADDING[2], layer.getPadding()[4]); + assertEquals(ZERO_PADDING[2], layer.getPadding()[5]); + + } + + private void buildZeroPaddingSingleDim3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) + throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_ZERO_PADDING_3D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + config.put(conf.getLAYER_FIELD_ZERO_PADDING(), ZERO_PADDING[0]); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + ZeroPadding3DLayer layer = new KerasZeroPadding3D(layerConfig).getZeroPadding3DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); + assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); + } +} diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java new file mode 100644 index 000000000000..ea56651dae7c --- /dev/null +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java @@ -0,0 +1,87 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.pooling; + +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.layers.PoolingType; +import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; +import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * @author Max Pumperla + */ +public class KerasPooling3DTest { + + private final String LAYER_NAME = "pooling_3d"; + private final int[] KERNEL_SIZE = new int[]{2, 2, 2}; + private final int[] STRIDE = new int[]{1, 1, 1}; + private final PoolingType POOLING_TYPE = PoolingType.MAX; + private final String BORDER_MODE_VALID = "valid"; + private final int[] VALID_PADDING = new int[]{0, 0, 0}; + + private Integer keras1 = 1; + private Integer keras2 = 2; + private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + + @Test + public void testPooling3DLayer() throws Exception { + buildPooling3DLayer(conf1, keras1); + buildPooling3DLayer(conf2, keras2); + } + + private void buildPooling3DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_3D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + List kernelSizeList = new ArrayList<>(); + kernelSizeList.add(KERNEL_SIZE[0]); + kernelSizeList.add(KERNEL_SIZE[1]); + kernelSizeList.add(KERNEL_SIZE[2]); + config.put(conf.getLAYER_FIELD_POOL_SIZE(), kernelSizeList); + List subsampleList = new ArrayList<>(); + subsampleList.add(STRIDE[0]); + subsampleList.add(STRIDE[1]); + subsampleList.add(STRIDE[2]); + config.put(conf.getLAYER_FIELD_POOL_STRIDES(), subsampleList); + config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + Subsampling3DLayer layer = new KerasPooling3D(layerConfig).getSubsampling3DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); + assertArrayEquals(STRIDE, layer.getStride()); + assertEquals(POOLING_TYPE, layer.getPoolingType()); + assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode()); + assertArrayEquals(VALID_PADDING, layer.getPadding()); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index 65b1c276cf06..577e853c465a 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -39,7 +39,8 @@ public interface Layer extends Serializable, Cloneable, Model { enum Type { - FEED_FORWARD, RECURRENT, CONVOLUTIONAL, SUBSAMPLING, RECURSIVE, MULTILAYER, NORMALIZATION + FEED_FORWARD, RECURRENT, CONVOLUTIONAL, CONVOLUTIONAL3D, + SUBSAMPLING, UPSAMPLING, RECURSIVE, MULTILAYER, NORMALIZATION } enum TrainingMode { @@ -54,18 +55,22 @@ enum TrainingMode { */ void setCacheMode(CacheMode mode); - /**Calculate the l2 regularization term
+ /** + * Calculate the l2 regularization term
* 0.0 if regularization is not used. Or 0.5 * l2Coeff * l2Magnitude otherwise.
* Note that this does not divide by mini-batch size + * * @param backpropOnlyParams If true: calculate L2 based on backprop params only. If false: calculate * based on all params (including pretrain params, if any) * @return the l2 regularization term for this layer. */ double calcL2(boolean backpropOnlyParams); - /**Calculate the l1 regularization term
+ /** + * Calculate the l1 regularization term
* 0.0 if regularization is not used. Or l1Coeff * l1Magnitude otherwise.
* Note that this does not divide by mini-batch size + * * @param backpropOnlyParams If true: calculate L1 based on backprop params only. If false: calculate * based on all params (including pretrain params, if any) * @return the l1 regularization term for this layer. @@ -74,40 +79,44 @@ enum TrainingMode { /** * Returns the layer type + * * @return */ Type type(); - /**Calculate the gradient relative to the error in the next layer - * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C - * is cost function a=sigma(z) is activation. + /** + * Calculate the gradient relative to the error in the next layer + * + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C + * is cost function a=sigma(z) is activation. * @param workspaceMgr Workspace manager - * @return Pair where Gradient is gradient for this layer, INDArray is epsilon (activation gradient) - * needed by next layer, but before element-wise multiply by sigmaPrime(z). So for standard feed-forward layer, if this layer is - * L, then return.getSecond() == dL/dIn = (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the - * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATION_GRAD} workspace via the workspace manager + * @return Pair where Gradient is gradient for this layer, INDArray is epsilon (activation gradient) + * needed by next layer, but before element-wise multiply by sigmaPrime(z). So for standard feed-forward layer, if this layer is + * L, then return.getSecond() == dL/dIn = (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the + * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATION_GRAD} workspace via the workspace manager */ Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr); /** * Perform forward pass and return the activations array with the last set input - * @param training training or test mode + * + * @param training training or test mode * @param workspaceMgr Workspace manager * @return the activation (layer output) of the last specified input. Note that the returned array should be placed - * in the {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager + * in the {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager */ INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr); /** * Perform forward pass and return the activations array with the specified input * - * @param input the input to use - * @param training train or test mode - * @param mgr Workspace manager. + * @param input the input to use + * @param training train or test mode + * @param mgr Workspace manager. * @return Activations array. Note that the returned array should be placed in the - * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager + * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager */ INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr); @@ -122,13 +131,13 @@ enum TrainingMode { /** * Clone the layer + * * @return */ @Deprecated Layer clone(); - /** * Get the iteration listeners for this layer. */ @@ -179,14 +188,17 @@ enum TrainingMode { */ void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr); - /** Set current/last input mini-batch size.
+ /** + * Set current/last input mini-batch size.
* Used for score and gradient calculations. Mini batch size may be different from * getInput().size(0) due to reshaping operations - for example, when using RNNs with * DenseLayer and OutputLayer. Called automatically during forward pass. */ void setInputMiniBatchSize(int size); - /** Get current/last input mini-batch size, as set by setInputMiniBatchSize(int) + /** + * Get current/last input mini-batch size, as set by setInputMiniBatchSize(int) + * * @see Layer#setInputMiniBatchSize(int) */ int getInputMiniBatchSize(); @@ -194,6 +206,7 @@ enum TrainingMode { /** * Set the mask array. Note: In general, {@link #feedForwardMaskArray(INDArray, MaskState, int)} should be used in * preference to this. + * * @param maskArray Mask array to set */ void setMaskArray(INDArray maskArray); @@ -224,7 +237,7 @@ enum TrainingMode { * @param currentMaskState Current state of the mask - see {@link MaskState} * @param minibatchSize Current minibatch size. Needs to be known as it cannot always be inferred from the activations * array due to reshaping (such as a DenseLayer within a recurrent neural network) - * @return New mask array after this layer, along with the new mask state. + * @return New mask array after this layer, along with the new mask state. */ Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java index c721f0ee306c..9afee47e9279 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java @@ -121,8 +121,8 @@ public static InputType convolutional(int height, int width, int depth) { * @param channels Number of channels of the input * @return InputTypeConvolutional3D */ - public static InputType convolutional3D(int height, int width, int depth, int channels) { - return new InputTypeConvolutional3D(height, width, depth, channels); + public static InputType convolutional3D(int depth, int height, int width, int channels) { + return new InputTypeConvolutional3D(depth, height, width, channels); } /** @@ -250,9 +250,9 @@ public int arrayElementsPerExample() { @EqualsAndHashCode(callSuper = false) @NoArgsConstructor public static class InputTypeConvolutional3D extends InputType { + private int depth; private int height; private int width; - private int depth; private int channels; @Override @@ -262,7 +262,7 @@ public Type getType() { @Override public String toString() { - return "InputTypeConvolutional3D(h=" + height + ",w=" + width + ",d=" + depth + ",c=" + channels + ")"; + return "InputTypeConvolutional3D(d=" + depth + ",h=" + height + ",w=" + width + ",c=" + channels + ")"; } @Override @@ -315,10 +315,10 @@ public static InputType inferInputType(INDArray inputArray) { case 3: return InputType.recurrent(inputArray.size(1), inputArray.size(2)); case 4: - //Order: [minibatch, channels, height, width] -> [h, w, d] + //Order: [minibatch, channels, height, width] -> [h, w, c] return InputType.convolutional(inputArray.size(2), inputArray.size(3), inputArray.size(1)); case 5: - //Order: [minibatch, channels, height, width, channels] -> [h, w, d, c] + //Order: [minibatch, channels, depth, height, width] -> [d, h, w, c] return InputType.convolutional3D(inputArray.size(2), inputArray.size(3), inputArray.size(4), inputArray.size(1)); default: diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java index 5a0d6fa0cc2c..6a2f2cdcb2c3 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -38,7 +38,7 @@ @EqualsAndHashCode(callSuper = true) public abstract class BaseUpsamplingLayer extends Layer { - protected int size; + protected int[] size; protected BaseUpsamplingLayer(UpsamplingBuilder builder) { super(builder); @@ -92,9 +92,24 @@ public boolean isPretrainParam(String paramName) { @NoArgsConstructor protected static abstract class UpsamplingBuilder> extends Layer.Builder { - protected int size = 1; + protected int[] size = new int[] {1}; + /** + * A single size integer is used for upsampling in all spatial dimensions + * + * @param size int for upsampling + */ protected UpsamplingBuilder(int size) { + this.size = new int[] {size}; + } + + /** + * An int array to specify upsampling dimensions, the length of which has to equal to the number of + * spatial dimensions (e.g. 2 for Upsampling2D etc.) + * + * @param size int for upsampling + */ + protected UpsamplingBuilder(int[] size) { this.size = size; } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java index 37e1bd22ab69..9ba1bf3fc3ea 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java @@ -85,33 +85,33 @@ public static InputType getOutputTypeCnn3DLayers(InputType inputType, int[] kern } InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType; + int inDepth = i.getDepth(); int inHeight = i.getHeight(); int inWidth = i.getWidth(); - int inDepth = i.getDepth(); - int padH = (padding == null ? 0 : padding[0]); - int padW = (padding == null ? 0 : padding[1]); - int padD = (padding == null ? 0 : padding[2]); + int padD = (padding == null ? 0 : padding[0]); + int padH = (padding == null ? 0 : padding[1]); + int padW = (padding == null ? 0 : padding[2]); - int kH = kernelSize[0]; - int kW = kernelSize[1]; - int kD = kernelSize[2]; + int kD = kernelSize[0]; + int kH = kernelSize[1]; + int kW = kernelSize[2]; if(dilation[0] != 1){ //Use *effective* kernel size, accounting for dilation - kH = kH + (kH-1)*(dilation[0]-1); + kD = kD + (kD-1)*(dilation[0]-1); } if(dilation[1] != 1){ - kW = kW + (kW-1)*(dilation[1]-1); + kH = kH + (kH-1)*(dilation[1]-1); } if(dilation[2] != 1){ - kD = kD + (kD-1)*(dilation[2]-1); + kW = kW + (kW-1)*(dilation[2]-1); } - int sH = stride[0]; + int sD = stride[0]; + int sH = stride[1]; int sW = stride[1]; - int sD = stride[1]; if (sH <= 0 || sW <= 0 || sD <= 0) { throw new DL4JInvalidConfigException(getConfigErrorCommonLine1(layerIdx, layerName, layerClass, sH <= 0) @@ -195,17 +195,17 @@ public static InputType getOutputTypeCnn3DLayers(InputType inputType, int[] kern } } else if (convolutionMode == ConvolutionMode.Same) { + int outD = (int) Math.ceil(inDepth / ((double) sD)); int outH = (int) Math.ceil(inHeight / ((double) sH)); int outW = (int) Math.ceil(inWidth / ((double) sW)); - int outD = (int) Math.ceil(inDepth / ((double) sD)); - return InputType.convolutional3D(outH, outW, outD, outputChannels); + return InputType.convolutional3D(outD, outH, outW, outputChannels); } + int dOut = (inDepth - kD + 2 * padD) / sD + 1; int hOut = (inHeight - kH + 2 * padH) / sH + 1; int wOut = (inWidth - kW + 2 * padW) / sW + 1; - int dOut = (inDepth - kD + 2 * padD) / sD + 1; - return InputType.convolutional3D(hOut, wOut, dOut, outputChannels); + return InputType.convolutional3D(dOut, hOut, wOut, outputChannels); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java new file mode 100644 index 000000000000..69c16d3187e4 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -0,0 +1,364 @@ +package org.deeplearning4j.nn.conf.layers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.deeplearning4j.util.Convolution3DUtils; +import org.deeplearning4j.util.ConvolutionUtils; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.Map; + +/** + * 3D subsampling / pooling layer for convolutional neural networks + *

+ * Supports max and average pooling modes + * + * @author Max Pumperla + */ + +@Data +@NoArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +public class Subsampling3DLayer extends Layer { + + protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType; + protected int[] kernelSize; + protected int[] stride; + protected int[] padding; + protected boolean cudnnAllowFallback = true; + + public enum PoolingType { + MAX, AVG; + + public org.deeplearning4j.nn.conf.layers.PoolingType toPoolingType() { + switch (this) { + case MAX: + return org.deeplearning4j.nn.conf.layers.PoolingType.MAX; + case AVG: + return org.deeplearning4j.nn.conf.layers.PoolingType.AVG; + } + throw new UnsupportedOperationException("Unknown/not supported pooling type: " + this); + } + } + + protected Subsampling3DLayer(BaseSubsamplingBuilder builder) { + super(builder); + this.poolingType = builder.poolingType; + if (builder.kernelSize.length != 3) + throw new IllegalArgumentException("Kernel size must be length 3"); + this.kernelSize = builder.kernelSize; + if (builder.stride.length != 3) + throw new IllegalArgumentException("Invalid stride, must be length 3"); + this.stride = builder.stride; + this.padding = builder.padding; + this.convolutionMode = builder.convolutionMode; + this.cudnnAllowFallback = builder.cudnnAllowFallback; + } + + @Override + public Subsampling3DLayer clone() { + Subsampling3DLayer clone = (Subsampling3DLayer) super.clone(); + + if (clone.kernelSize != null) + clone.kernelSize = clone.kernelSize.clone(); + if (clone.stride != null) + clone.stride = clone.stride.clone(); + if (clone.padding != null) + clone.padding = clone.padding.clone(); + return clone; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, INDArray layerParamsView, + boolean initializeParams) { + org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer ret = + new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer(conf); + ret.setListeners(iterationListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException("Invalid input for Subsampling 3D layer (layer name=\"" + getLayerName() + + "\"): Expected CNN input, got " + inputType); + } + + return InputTypeUtil.getOutputTypeCnn3DLayers(inputType, kernelSize, stride, padding, + new int[]{1, 1, 1}, // no dilation + convolutionMode, + ((InputType.InputTypeConvolutional3D) inputType).getChannels(), layerIndex, getLayerName(), + Subsampling3DLayer.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + //No op: subsampling layer doesn't have nIn value + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException("Invalid input for Subsampling 3D layer (layer name=\"" + getLayerName() + + "\"): input is null"); + } + + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + } + + @Override + public double getL1ByParam(String paramName) { + //Not applicable + return 0; + } + + @Override + public double getL2ByParam(String paramName) { + //Not applicable + return 0; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + InputType.InputTypeConvolutional3D outputType = (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType); + int actElementsPerEx = outputType.arrayElementsPerExample(); + + + //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem + int im2colSizePerEx = + c.getChannels() * outputType.getHeight() * outputType.getWidth() * outputType.getDepth() + * kernelSize[0] * kernelSize[1]; + + //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass + int trainingWorkingSizePerEx = im2colSizePerEx; + if (getIDropout() != null) { + //Dup on the input before dropout, but only for training + trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); + } + + return new LayerMemoryReport.Builder(layerName, Subsampling3DLayer.class, inputType, outputType) + .standardMemory(0, 0) //No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching + .build(); + } + + @NoArgsConstructor + public static class Builder extends BaseSubsamplingBuilder { + + + public Builder(PoolingType poolingType, int[] kernelSize, int[] stride) { + super(poolingType, kernelSize, stride); + } + + public Builder(PoolingType poolingType, int[] kernelSize) { + super(poolingType, kernelSize); + } + + public Builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + super(poolingType, kernelSize, stride, padding); + } + + public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + super(poolingType, kernelSize); + } + + public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, + int[] padding) { + super(poolingType, kernelSize, stride, padding); + } + + public Builder(int[] kernelSize, int[] stride, int[] padding) { + super(kernelSize, stride, padding); + } + + public Builder(int[] kernelSize, int[] stride) { + super(kernelSize, stride); + } + + public Builder(int... kernelSize) { + super(kernelSize); + } + + public Builder(PoolingType poolingType) { + super(poolingType); + } + + public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + super(poolingType); + } + + /** + * Kernel size + * + * @param kernelSize kernel size in height and width dimensions + */ + public Builder kernelSize(int... kernelSize) { + if (kernelSize.length != 3) + throw new IllegalArgumentException("Invalid input: must be length 3"); + this.kernelSize = kernelSize; + return this; + } + + /** + * Stride + * + * @param stride stride in height and width dimensions + */ + public Builder stride(int... stride) { + if (stride.length != 3) + throw new IllegalArgumentException("Invalid input: must be length 3"); + this.stride = stride; + return this; + } + + /** + * Padding + * + * @param padding padding in the height and width dimensions + */ + public Builder padding(int... padding) { + if (padding.length != 3) + throw new IllegalArgumentException("Invalid input: must be length 3"); + this.padding = padding; + return this; + } + + + @Override + @SuppressWarnings("unchecked") + public Subsampling3DLayer build() { + ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); + Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); + return new Subsampling3DLayer(this); + } + } + + @NoArgsConstructor + protected static abstract class BaseSubsamplingBuilder> + extends Layer.Builder { + protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = + org.deeplearning4j.nn.conf.layers.PoolingType.MAX; + protected int[] kernelSize = new int[]{1, 1, 1}; + protected int[] stride = new int[]{2, 2, 2}; + protected int[] padding = new int[]{0, 0, 0}; + protected ConvolutionMode convolutionMode = null; + protected boolean cudnnAllowFallback = true; + + protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride) { + this.poolingType = poolingType.toPoolingType(); + this.kernelSize = kernelSize; + this.stride = stride; + } + + protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize) { + this.poolingType = poolingType.toPoolingType(); + this.kernelSize = kernelSize; + } + + protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + this.poolingType = poolingType.toPoolingType(); + this.kernelSize = kernelSize; + this.stride = stride; + this.padding = padding; + } + + protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + this.poolingType = poolingType; + this.kernelSize = kernelSize; + } + + protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, + int[] stride, int[] padding) { + this.poolingType = poolingType; + this.kernelSize = kernelSize; + this.stride = stride; + this.padding = padding; + } + + protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride, int[] padding) { + this.kernelSize = kernelSize; + this.stride = stride; + this.padding = padding; + } + + protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride) { + this.kernelSize = kernelSize; + this.stride = stride; + } + + protected BaseSubsamplingBuilder(int... kernelSize) { + this.kernelSize = kernelSize; + } + + protected BaseSubsamplingBuilder(PoolingType poolingType) { + this.poolingType = poolingType.toPoolingType(); + } + + protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + this.poolingType = poolingType; + } + + /** + * Set the convolution mode for the Convolution layer. + * See {@link ConvolutionMode} for more details + * + * @param convolutionMode Convolution mode for layer + */ + public T convolutionMode(ConvolutionMode convolutionMode) { + this.convolutionMode = convolutionMode; + return (T) this; + } + + public T poolingType(PoolingType poolingType) { + this.poolingType = poolingType.toPoolingType(); + return (T) this; + } + + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in (non-CuDNN) + * implementation for ConvolutionLayer will be used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used + */ + public T cudnnAllowFallback(boolean allowFallback) { + this.cudnnAllowFallback = allowFallback; + return (T) this; + } + } + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java index 15bddf965756..4d20a03ead54 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; @@ -44,7 +45,7 @@ @EqualsAndHashCode(callSuper = true) public class Upsampling1D extends BaseUpsamplingLayer { - protected int size; + protected int[] size; protected Upsampling1D(UpsamplingBuilder builder) { super(builder); @@ -97,7 +98,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; InputType.InputTypeRecurrent outputType = (InputType.InputTypeRecurrent) getOutputType(-1, inputType); - int im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size; + int im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size[0]; int trainingWorkingSizePerEx = im2colSizePerEx; if (getIDropout() != null) { trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); @@ -114,17 +115,28 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { public static class Builder extends UpsamplingBuilder { public Builder(int size) { - super(size); + super(new int[] {size, size}); } /** - * Upsampling size + * Upsampling size int * - * @param size upsampling size in height and width dimensions + * @param size upsampling size in single spatial dimension of this 1D layer */ public Builder size(int size) { - this.size = size; + this.size = new int[] {size, size}; + return this; + } + + /** + * Upsampling size int array with a single element + * + * @param size upsampling size in single spatial dimension of this 1D layer + */ + public Builder size(int[] size) { + Preconditions.checkArgument(size.length == 1); + this.size = new int[] {size[0], size[0]}; // Since this is 2D under the hood, we need to hide this. return this; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index 0713fb61f793..3e2cb8d99028 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; @@ -44,7 +45,7 @@ @EqualsAndHashCode(callSuper = true) public class Upsampling2D extends BaseUpsamplingLayer { - protected int size; + protected int[] size; protected Upsampling2D(UpsamplingBuilder builder) { super(builder); @@ -59,10 +60,10 @@ public Upsampling2D clone() { @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams) { + Collection trainingListeners, int layerIndex, INDArray layerParamsView, + boolean initializeParams) { org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D ret = - new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(conf); + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(conf); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); @@ -75,22 +76,22 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); + throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getLayerName() + + "\"): Expected CNN input, got " + inputType); } InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; int inHeight = i.getHeight(); int inWidth = i.getWidth(); int inDepth = i.getChannels(); - return InputType.convolutional(size * inHeight, size * inWidth, inDepth); + return InputType.convolutional(size[0] * inHeight, size[1] * inWidth, inDepth); } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName() - + "\"): input is null"); + throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getLayerName() + + "\"): input is null"); } return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); } @@ -101,7 +102,8 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem - int im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * size; + int im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() + * size[0] * size[1] * size[2]; // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass int trainingWorkingSizePerEx = im2colSizePerEx; @@ -111,10 +113,10 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { } return new LayerMemoryReport.Builder(layerName, Upsampling2D.class, inputType, outputType) - .standardMemory(0, 0) //No params - .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); + .standardMemory(0, 0) //No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching + .build(); } @@ -122,16 +124,29 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { public static class Builder extends UpsamplingBuilder { public Builder(int size) { - super(size); + super(new int[]{size, size}); } /** - * Upsampling size + * Upsampling size int, used for both height and width * - * @param size upsampling size in height and width dimensions + * @param size upsampling size in height and width dimensions */ public Builder size(int size) { + this.size = new int[]{size, size}; + return this; + } + + + /** + * Upsampling size array + * + * @param size upsampling size in height and width dimensions + */ + public Builder size(int[] size) { + Preconditions.checkArgument(size.length == 2); + this.size = size; return this; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java new file mode 100644 index 000000000000..cccd5c655588 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -0,0 +1,163 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.conf.layers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.optimize.api.IterationListener; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.base.Preconditions; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.Map; + +/** + * Upsampling 3D layer + * + * @author Max Pumperla + */ + +@Data +@NoArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +public class Upsampling3D extends BaseUpsamplingLayer { + + protected int[] size; + + protected Upsampling3D(UpsamplingBuilder builder) { + super(builder); + this.size = builder.size; + } + + @Override + public Upsampling3D clone() { + return (Upsampling3D) super.clone(); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, INDArray layerParamsView, + boolean initializeParams) { + org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D ret = + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D(conf); + ret.setListeners(iterationListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getLayerName() + + "\"): Expected CNN3D input, got " + inputType); + } + InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType; + int inHeight = i.getHeight(); + int inWidth = i.getWidth(); + int inDepth = i.getDepth(); + int inChannels = i.getChannels(); + + return InputType.convolutional3D( + size[0] * inDepth,size[1] * inHeight, size[2] * inWidth, inChannels); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getLayerName() + + "\"): input is null"); + } + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + InputType.InputTypeConvolutional3D outputType = + (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType); + + // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem + int im2colSizePerEx = c.getChannels() & outputType.getDepth() * outputType.getHeight() + * outputType.getWidth() * size[0] * size[1] * size[2]; + + // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass + int trainingWorkingSizePerEx = im2colSizePerEx; + if (getIDropout() != null) { + //Dup on the input before dropout, but only for training + trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); + } + + return new LayerMemoryReport.Builder(layerName, Upsampling3D.class, inputType, outputType) + .standardMemory(0, 0) //No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching + .build(); + } + + + @NoArgsConstructor + public static class Builder extends UpsamplingBuilder { + + public Builder(int size) { + super(new int[] {size, size, size}); + } + + /** + * Upsampling size as int, so same upsampling size is used for depth, width and height + * + * @param size upsampling size in height, width and depth dimensions + */ + public Builder size(int size) { + + this.size = new int[] {size, size, size}; + return this; + } + + /** + * Upsampling size as int, so same upsampling size is used for depth, width and height + * + * @param size upsampling size in height, width and depth dimensions + */ + public Builder size(int[] size) { + Preconditions.checkArgument(size.length == 3); + this.size = size; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public Upsampling3D build() { + return new Upsampling3D(this); + } + } + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java new file mode 100644 index 000000000000..cd9e99c5df70 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java @@ -0,0 +1,196 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.conf.layers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; + +/** + * Zero padding 3D layer for convolutional neural networks. + * Allows padding to be done separately for "left" and "right" + * in all three spatial dimensions. + * + * @author Max Pumperla + */ +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +public class ZeroPadding3DLayer extends Layer { + + private int[] padding; // [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] + + private ZeroPadding3DLayer(Builder builder) { + super(builder); + this.padding = builder.padding; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, INDArray layerParamsView, + boolean initializeParams) { + org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer ret = + new org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer(conf); + ret.setListeners(iterationListeners); + ret.setIndex(layerIndex); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException("Invalid input for 3D CNN layer (layer index = " + layerIndex + + ", layer name = \"" + getLayerName() + "\"): expect CNN3D input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + return InputType.convolutional3D( + c.getDepth() + padding[0] + padding[1], + c.getHeight() + padding[2] + padding[3], + c.getWidth() + padding[4] + padding[5], + c.getChannels()); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + //No op + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException("Invalid input for ZeroPadding3DLayer layer (layer name=\"" + getLayerName() + + "\"): input is null"); + } + + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + } + + @Override + public double getL1ByParam(String paramName) { + return 0; + } + + @Override + public double getL2ByParam(String paramName) { + return 0; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("ZeroPadding3DLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType outputType = getOutputType(-1, inputType); + + return new LayerMemoryReport.Builder(layerName, ZeroPadding3DLayer.class, inputType, outputType) + .standardMemory(0, 0) //No params + .workingMemory(0, 0, MemoryReport.CACHE_MODE_ALL_ZEROS, + MemoryReport.CACHE_MODE_ALL_ZEROS) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching + .build(); + } + + public static class Builder extends Layer.Builder { + + private int[] padding = new int[]{0, 0, 0, 0, 0, 0}; // [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] + + /** + * @param padding Padding for both the left and right in all three spatial dimensions + */ + public Builder(int padding) { + this(padding, padding, padding, padding, padding, padding); + } + + + /** + * Use same padding for left and right boundaries in depth, height and width. + * + * @param padDepth padding used for both depth boundaries + * @param padHeight padding used for both height boundaries + * @param padWidth padding used for both width boudaries + */ + public Builder(int padDepth, int padHeight, int padWidth) { + this(padDepth, padDepth, padHeight, padHeight, padWidth, padWidth); + } + + /** + * Explicit padding of left and right boundaries in depth, height and width dimensions + * + * @param padLeftD Depth padding left + * @param padRightD Depth padding right + * @param padLeftH Height padding left + * @param padRightH Height padding right + * @param padLeftW Width padding left + * @param padRightW Width padding right + */ + public Builder(int padLeftD, int padRightD, + int padLeftH, int padRightH, + int padLeftW, int padRightW) { + this(new int[]{padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW}); + } + + public Builder(int[] padding) { + if (padding.length == 3) { + this.padding = new int[]{padding[0], padding[0], padding[1], padding[1], padding[2], padding[2]}; + } else if (padding.length == 6) { + this.padding = padding; + } else if (padding.length == 1) { + this.padding = new int[]{padding[0], padding[0], padding[0], padding[0], padding[0], padding[0]}; + } else { + throw new IllegalStateException("Padding length has to be either 1, 3 or 6, got " + padding.length); + } + } + + @Override + @SuppressWarnings("unchecked") + public ZeroPadding3DLayer build() { + for (int p : padding) { + if (p < 0) + throw new IllegalStateException("Invalid zero padding layer config: padding [left, right]" + + " must be > 0 for all elements. Got: " + Arrays.toString(padding)); + } + return new ZeroPadding3DLayer(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java index 3ef37555a924..71fc58f12800 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java @@ -38,26 +38,25 @@ public class Cropping2D extends NoParamLayer { * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations * @param cropLeftRight Amount of cropping to apply to both the left and the right of the input activations */ - public Cropping2D(int cropTopBottom, int cropLeftRight){ + public Cropping2D(int cropTopBottom, int cropLeftRight) { this(cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); } /** - * * @param cropTop Amount of cropping to apply to the top of the input activations * @param cropBottom Amount of cropping to apply to the bottom of the input activations * @param cropLeft Amount of cropping to apply to the left of the input activations * @param cropRight Amount of cropping to apply to the right of the input activations */ - public Cropping2D(int cropTop, int cropBottom, int cropLeft, int cropRight){ + public Cropping2D(int cropTop, int cropBottom, int cropLeft, int cropRight) { this(new Builder(cropTop, cropBottom, cropLeft, cropRight)); } - public Cropping2D(int[] cropping){ + public Cropping2D(int[] cropping) { this(new Builder(cropping)); } - protected Cropping2D(Builder builder){ + protected Cropping2D(Builder builder) { super(builder); this.cropping = builder.cropping; } @@ -100,19 +99,19 @@ public static class Builder extends Layer.Builder { private int[] cropping = new int[]{0, 0, 0, 0}; - public Builder(){ + public Builder() { } /** * @param cropping Cropping amount for top/bottom/left/right (in that order). Must be length 4 array. */ - public Builder(@NonNull int[] cropping){ + public Builder(@NonNull int[] cropping) { Preconditions.checkArgument(cropping.length == 4 || cropping.length == 2, "Either 2 or 4 cropping values, i.e. (top/bottom. left/right) or (top, bottom," + " left, right) must be provided. Got " + cropping.length + " values: " + Arrays.toString(cropping)); if (cropping.length == 2) { - this.cropping = new int[] {cropping[0], cropping[0], cropping[1], cropping[1]}; + this.cropping = new int[]{cropping[0], cropping[0], cropping[1], cropping[1]}; } else { this.cropping = cropping; } @@ -122,24 +121,23 @@ public Builder(@NonNull int[] cropping){ * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations * @param cropLeftRight Amount of cropping to apply to both the left and the right of the input activations */ - public Builder(int cropTopBottom, int cropLeftRight){ + public Builder(int cropTopBottom, int cropLeftRight) { this(cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); } /** - * * @param cropTop Amount of cropping to apply to the top of the input activations * @param cropBottom Amount of cropping to apply to the bottom of the input activations * @param cropLeft Amount of cropping to apply to the left of the input activations * @param cropRight Amount of cropping to apply to the right of the input activations */ - public Builder(int cropTop, int cropBottom, int cropLeft, int cropRight){ + public Builder(int cropTop, int cropBottom, int cropLeft, int cropRight) { this.cropping = new int[]{cropTop, cropBottom, cropLeft, cropRight}; Preconditions.checkArgument(cropTop >= 0 && cropBottom >= 0 && cropLeft >= 0 && cropRight >= 0, "Invalid arguments: crop dimensions must be > 0. Got [t,b,l,r] = " + Arrays.toString(this.cropping)); } - public Cropping2D build(){ + public Cropping2D build() { return new Cropping2D(this); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java new file mode 100644 index 000000000000..23c3cb56f55e --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java @@ -0,0 +1,165 @@ +package org.deeplearning4j.nn.conf.layers.convolutional; + +import com.google.common.base.Preconditions; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.InputTypeUtil; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.NoParamLayer; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.layers.convolution.Cropping2DLayer; +import org.deeplearning4j.nn.layers.convolution.Cropping3DLayer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.deeplearning4j.util.ConvolutionUtils; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; + +/** + * Cropping layer for convolutional (3d) neural networks. + * Allows cropping to be done separately for upper and lower bounds of + * depth, height and width dimensions. + * + * @author Max Pumperla + */ +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +public class Cropping3D extends NoParamLayer { + + private int[] cropping; + + /** + * @param cropDepth Amount of cropping to apply to both depth boundaries of the input activations + * @param cropHeight Amount of cropping to apply to both height boundaries of the input activations + * @param cropWidth Amount of cropping to apply to both width boundaries of the input activations + */ + public Cropping3D(int cropDepth, int cropHeight, int cropWidth) { + this(cropDepth, cropDepth, cropHeight, cropHeight, cropWidth, cropWidth); + } + + /** + * @param cropLeftD Amount of cropping to apply to the left of the depth dimension + * @param cropRightD Amount of cropping to apply to the right of the depth dimension + * @param cropLeftH Amount of cropping to apply to the left of the height dimension + * @param cropRightH Amount of cropping to apply to the right of the height dimension + * @param cropLeftW Amount of cropping to apply to the left of the width dimension + * @param cropRightW Amount of cropping to apply to the right of the width dimension + */ + public Cropping3D(int cropLeftD, int cropRightD, int cropLeftH, int cropRightH, int cropLeftW, int cropRightW) { + this(new Builder(cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW)); + } + + public Cropping3D(int[] cropping) { + this(new Builder(cropping)); + } + + protected Cropping3D(Builder builder) { + super(builder); + this.cropping = builder.cropping; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, INDArray layerParamsView, + boolean initializeParams) { + Cropping3DLayer ret = new Cropping3DLayer(conf); + ret.setListeners(iterationListeners); + ret.setIndex(layerIndex); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException("Invalid input for 3D cropping layer (layer index = " + layerIndex + + ", layer name = \"" + getLayerName() + "\"): expect CNN3D input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + return InputType.convolutional3D( + c.getDepth() - cropping[0] - cropping[1], + c.getHeight() - cropping[2] - cropping[3], + c.getWidth() - cropping[4] - cropping[5], + c.getChannels()); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + Preconditions.checkArgument(inputType != null, "Invalid input for Cropping3D " + + "layer (layer name=\"" + getLayerName() + "\"): InputType is null"); + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return null; + } + + + public static class Builder extends Layer.Builder { + + private int[] cropping = new int[]{0, 0, 0, 0, 0, 0}; + + public Builder() { + + } + + /** + * @param cropping Cropping amount, must be length 3 or 6 array, i.e. either + * crop depth, crop height, crop width or + * crop left depth, crop right depth, crop left height, crop right height, crop left width, + * crop right width + */ + public Builder(@NonNull int[] cropping) { + Preconditions.checkArgument(cropping.length == 6 || cropping.length == 3, + "Either 3 or 6 cropping values, got " + + cropping.length + " values: " + Arrays.toString(cropping)); + if (cropping.length == 3) { + this.cropping = new int[]{cropping[0], cropping[0], cropping[1], cropping[1], cropping[2], cropping[2]}; + } else { + this.cropping = cropping; + } + } + + /** + * @param cropDepth Amount of cropping to apply to both depth boundaries of the input activations + * @param cropHeight Amount of cropping to apply to both height boundaries of the input activations + * @param cropWidth Amount of cropping to apply to both width boundaries of the input activations + */ + public Builder(int cropDepth, int cropHeight, int cropWidth) { + this(cropDepth, cropDepth, cropHeight, cropHeight, cropWidth, cropWidth); + } + + /** + * @param cropLeftD Amount of cropping to apply to the left of the depth dimension + * @param cropRightD Amount of cropping to apply to the right of the depth dimension + * @param cropLeftH Amount of cropping to apply to the left of the height dimension + * @param cropRightH Amount of cropping to apply to the right of the height dimension + * @param cropLeftW Amount of cropping to apply to the left of the width dimension + * @param cropRightW Amount of cropping to apply to the right of the width dimension + */ + public Builder(int cropLeftD, int cropRightD, int cropLeftH, int cropRightH, int cropLeftW, int cropRightW) { + this.cropping = new int[]{cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW}; + Preconditions.checkArgument(cropLeftD >= 0 && cropLeftH >= 0 && cropLeftW >= 0 + && cropRightD >= 0 && cropRightH >= 0 && cropRightW >= 0, + "Invalid arguments: crop dimensions must be > 0. Got " + Arrays.toString(this.cropping)); + } + + public Cropping3D build() { + return new Cropping3D(this); + } + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java index 635c934f7f5e..67dcd80d74bd 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java @@ -150,7 +150,7 @@ public InputType getOutputType(InputType inputType) { + " = (d=" + numChannels + " * w=" + inputWidth + " * h=" + inputHeight + "), got " + inputType); } - return InputType.convolutional3D(inputHeight, inputWidth, inputDepth, numChannels); + return InputType.convolutional3D(inputDepth, inputHeight, inputWidth, numChannels); case CNN: InputType.InputTypeConvolutional c2 = (InputType.InputTypeConvolutional) inputType; @@ -159,7 +159,7 @@ public InputType getOutputType(InputType inputType) { + "," + c2.getWidth() + "," + c2.getHeight() + ") but expected (" + numChannels + "," + inputHeight + "," + inputWidth + ")"); } - return InputType.convolutional3D(c2.getHeight(), c2.getWidth(), 1, c2.getChannels()); + return InputType.convolutional3D(1, c2.getHeight(), c2.getWidth(), c2.getChannels()); case CNN3D: InputType.InputTypeConvolutional3D c3 = (InputType.InputTypeConvolutional3D) inputType; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java new file mode 100644 index 000000000000..1dea0dbec6d2 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java @@ -0,0 +1,88 @@ +package org.deeplearning4j.nn.layers.convolution; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; + +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + +/** + * Cropping layer for 3D convolutional neural networks. + * Allows cropping to be done separately for upper and lower bounds of + * depth, height and width dimensions. + * + * @author Max Pumperla + */ +public class Cropping3DLayer extends AbstractLayer { + + private int[] cropping; //[cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW] + + public Cropping3DLayer(NeuralNetConfiguration conf) { + super(conf); + this.cropping = ((org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D) conf.getLayer()).getCropping(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //No op + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL3D; + } + + @Override + public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + int[] inShape = input.shape(); + INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, inShape, 'c'); + INDArray epsNextSubset = inputSubset(epsNext); + epsNextSubset.assign(epsilon); + return new Pair<>((Gradient) new DefaultGradient(), epsNext); + } + + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + INDArray ret = inputSubset(input); + ret = workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, ret); + workspaceMgr.validateArrayLocation(ArrayType.ACTIVATIONS, ret, false, false); + return ret; + } + + @Override + public Layer clone() { + return new Cropping3DLayer(conf.clone()); + } + + @Override + public double calcL1(boolean backpropParamsOnly) { + return 0; + } + + @Override + public double calcL2(boolean backpropParamsOnly) { + return 0; + } + + private INDArray inputSubset(INDArray from){ + //NCDHW format + return from.get(all(), all(), + interval(cropping[0], from.size(2)-cropping[1]), + interval(cropping[2], from.size(3)-cropping[3]), + interval(cropping[4], from.size(4)-cropping[5])); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java new file mode 100644 index 000000000000..7977b3b0087c --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java @@ -0,0 +1,112 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.layers.convolution; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.INDArrayIndex; +import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; + +/** + * Zero padding 3D layer for convolutional neural networks. + * Allows padding to be done separately for left and right boundaries + * in all three spatial input dimensions. + * + * @author Max Pumperla + */ +public class ZeroPadding3DLayer extends AbstractLayer { + + private int[] padding; // [padLeft1, padRight1, padLeft2, padRight2, padLeft3, padRight3] + + public ZeroPadding3DLayer(NeuralNetConfiguration conf) { + super(conf); + this.padding = ((org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer) conf.getLayer()).getPadding(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //No op + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL; + } + + @Override + public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + int[] inShape = input.shape(); + + INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), + NDArrayIndex.interval(padding[0], padding[0] + inShape[2]), + NDArrayIndex.interval(padding[2], padding[2] + inShape[3]), + NDArrayIndex.interval(padding[4], padding[4] + inShape[4])); + + epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext); + return new Pair<>((Gradient) new DefaultGradient(), epsNext); + } + + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + int[] inShape = input.shape(); + int outD = inShape[2] + padding[0] + padding[1]; + int outH = inShape[3] + padding[2] + padding[3]; + int outW = inShape[4] + padding[4] + padding[5]; + int[] outShape = new int[] {inShape[0], inShape[1], outD, outH, outW}; + + INDArray out = workspaceMgr.create(ArrayType.ACTIVATIONS, outShape, 'c'); + + out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.all(), + NDArrayIndex.interval(padding[0], padding[0] + inShape[2]), + NDArrayIndex.interval(padding[2], padding[2] + inShape[3]), + NDArrayIndex.interval(padding[4], padding[4] + inShape[4])}, + input); + + return out; + } + + @Override + public Layer clone() { + return new ZeroPadding3DLayer(conf.clone()); + } + + @Override + public double calcL1(boolean backpropParamsOnly) { + return 0; + } + + @Override + public double calcL2(boolean backpropParamsOnly) { + return 0; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java new file mode 100644 index 000000000000..fa0bb67de7a0 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java @@ -0,0 +1,278 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.layers.convolution.subsampling; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.PoolingType; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.params.Convolution3DParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution3DUtils; +import org.deeplearning4j.util.ConvolutionUtils; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.CustomOp; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.api.ops.Op; +import org.nd4j.linalg.api.ops.impl.layers.convolution.LegacyPooling2D; +import org.nd4j.linalg.api.ops.impl.transforms.IsMax; +import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.convolution.Convolution; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; +import org.nd4j.linalg.util.ArrayUtil; +import org.nd4j.util.OneTimeLogger; + +import java.util.Arrays; +import java.util.Properties; + + +/** + * Subsampling 3D layer, used for downsampling a 3D convolution + * + * @author Max Pumperla + */ +@Slf4j +public class Subsampling3DLayer extends AbstractLayer { + + protected ConvolutionMode convolutionMode; + + public Subsampling3DLayer(NeuralNetConfiguration conf) { + super(conf); + this.convolutionMode = + ((org.deeplearning4j.nn.conf.layers.Subsampling3DLayer) conf.getLayer()).getConvolutionMode(); + } + + public Subsampling3DLayer(NeuralNetConfiguration conf, INDArray input) { + super(conf, input); + } + + + @Override + public double calcL2(boolean backpropParamsOnly) { + return 0; + } + + @Override + public double calcL1(boolean backpropParamsOnly) { + return 0; + } + + @Override + public Type type() { + return Type.SUBSAMPLING; + } + + @Override + public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + + int miniBatch = input.size(0); + int inChannels = input.size(1); + int inD = input.size(2); + int inH = input.size(3); + int inW = input.size(4); + + int[] kernel = layerConf().getKernelSize(); + int[] strides = layerConf().getStride(); + int[] dilation = new int[]{1, 1, 1}; + + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = Convolution3DUtils.get3DOutputSize( + input, kernel, strides, null, convolutionMode, dilation, true); + pad = Convolution3DUtils.get3DSameModeTopLeftPadding( + outSize, new int[]{inD, inH, inW}, kernel, strides, dilation); + } else { + pad = layerConf().getPadding(); + } + + INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, + miniBatch * inChannels * inD * inH * inW); + outEpsilon = outEpsilon.reshape('c', miniBatch, inChannels, inD, inH, inW); + + + int[] intArgs = new int[]{ + kernel[0], kernel[1], kernel[2], + strides[0], strides[1], strides[2], + pad[0], pad[1], pad[2], + convolutionMode == ConvolutionMode.Same ? 1 : 0, + 0 // isNCDHW, i.e. channels first by default + }; + + String opName = layerConf().getPoolingType() == PoolingType.MAX ? "maxpool3dnew_bp" : "avgpool3dnew_bp"; + + CustomOp op = DynamicCustomOp.builder(opName) + .addInputs(input, epsilon) + .addIntegerArguments(intArgs) + .addOutputs(outEpsilon) + .callInplace(false) + .build(); + + Nd4j.getExecutioner().exec(op); + + Gradient retGradient = new DefaultGradient(); + return new Pair<>(retGradient, outEpsilon); + } + + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + if (training && !dropoutApplied && layerConf().getIDropout() != null) { + applyDropOutIfNecessary(true, workspaceMgr); + } + + if (input.rank() != 5) { + throw new DL4JInvalidInputException("Got rank " + input.rank() + + " array as input to Subsampling3DLayer with shape " + Arrays.toString(input.shape()) + + ". Expected rank 5 array with shape [minibatchSize, channels, " + + "inputDepth, inputHeight, inputWidth]. " + + layerId()); + } + + int miniBatch = input.size(0); + int inChannels = input.size(1); + int inD = input.size(2); + int inH = input.size(3); + int inW = input.size(4); + + int[] kernel = layerConf().getKernelSize(); + int[] strides = layerConf().getStride(); + int[] dilation = new int[]{1, 1, 1}; + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + int[] inShape = new int[]{inD, inH, inW}; + outSize = Convolution3DUtils.get3DOutputSize( + input, kernel, strides, null, convolutionMode, dilation, true); + pad = Convolution3DUtils.get3DSameModeTopLeftPadding(outSize, inShape, kernel, strides, dilation); + } else { + pad = layerConf().getPadding(); + outSize = Convolution3DUtils.get3DOutputSize( + input, kernel, strides, pad, convolutionMode, dilation, true); + } + int outD = outSize[0]; + int outH = outSize[1]; + int outW = outSize[2]; + + String opName = layerConf().getPoolingType() == PoolingType.MAX ? "maxpool3dnew" : "avgpool3dnew"; + + INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, + new int[]{miniBatch, inChannels, outD, outH, outW}, 'c'); + + int[] intArgs = new int[]{ + kernel[0], kernel[1], kernel[2], + strides[0], strides[1], strides[2], + pad[0], pad[1], pad[2], + convolutionMode == ConvolutionMode.Same ? 1 : 0, + 0 // isNCDHW, i.e. channels first by default + }; + + CustomOp op = DynamicCustomOp.builder(opName) + .addInputs(input) + .addIntegerArguments(intArgs) + .addOutputs(output) + .callInplace(false) + .build(); + + Nd4j.getExecutioner().exec(op); + + return output; + } + + @Override + public Layer transpose() { + throw new UnsupportedOperationException(layerId()); + } + + @Override + public Layer clone() { + return new Subsampling3DLayer(conf.clone()); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //no op + } + + @Override + public Gradient gradient() { + throw new UnsupportedOperationException("Not supported - no parameters"); + } + + @Override + public void fit() { + + } + + @Override + public int numParams() { + return 0; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + } + + @Override + public double score() { + return 0; + } + + @Override + public void accumulateScore(double accum) { + throw new UnsupportedOperationException(layerId()); + } + + + @Override + public void update(INDArray gradient, String paramType) { + + } + + @Override + public INDArray params() { + return null; + } + + @Override + public INDArray getParam(String param) { + return params(); + } + + @Override + public void setParams(INDArray params) { + + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java index 28a44e8a8c81..7adbe8c5051d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java @@ -61,10 +61,10 @@ public Upsampling1D(NeuralNetConfiguration conf, INDArray input) { public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - int size = ((BaseUpsamplingLayer) layerConf()).getSize(); + int[] size = ((BaseUpsamplingLayer) layerConf()).getSize(); epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); // we replicate the error term times "size" so that backprop works properly on it - epsilon = epsilon.repeat(3, size); + epsilon = epsilon.repeat(3, size[0]); INDArray originalInput = input; input = input.reshape(input.size(0), input.size(1), input.size(2), 1); @@ -78,14 +78,11 @@ public Pair backpropGradient(INDArray epsilon, LayerWorkspac INDArray outEpsilon = Nd4j.create(miniBatch * inDepth * inH * inW); INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW); - INDArray forwardOutput = preOutput(true, true, LayerWorkspaceMgr.noWorkspaces()); - forwardOutput = forwardOutput.reshape( - forwardOutput.size(0), forwardOutput.size(1), forwardOutput.size(2), 1); - forwardOutput = forwardOutput.repeat(3, size); + int[] intArgs = new int[] {1}; // 1 is for NCHW CustomOp op = DynamicCustomOp.builder("upsampling_bp") - .addIntegerArguments(size) - .addInputs(forwardOutput, epsilon) + .addIntegerArguments(intArgs) + .addInputs(input, epsilon) .addOutputs(reshapedEpsilon) .callInplace(false) .build(); @@ -97,11 +94,11 @@ public Pair backpropGradient(INDArray epsilon, LayerWorkspac input = originalInput; // Since we aggregate the gradient across "size" slices, we need to normalize afterwards. - return new Pair<>(gradient, reshapedEpsilon.divi(size)); + return new Pair<>(gradient, reshapedEpsilon.divi(size[0])); } @Override - protected int getSize(){ + protected int[] getSize(){ return ((org.deeplearning4j.nn.conf.layers.Upsampling1D)conf.getLayer()).getSize(); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java index e43d3ad25e91..05326d52d1ab 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java @@ -73,7 +73,7 @@ public double calcL1(boolean backpropParamsOnly) { @Override public Type type() { - return Type.SUBSAMPLING; + return Type.UPSAMPLING; } @@ -86,17 +86,16 @@ public Pair backpropGradient(INDArray epsilon, LayerWorkspac int inH = input.size(2); int inW = input.size(3); - int size = getSize(); - INDArray reshapedEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new int[]{miniBatch, inDepth, inH, inW}, 'c'); - INDArray forwardOutput = preOutput(true, true, workspaceMgr); - Gradient gradient = new DefaultGradient(); + int[] intArgs = new int[] {1}; // 1 is for NCHW + + CustomOp op = DynamicCustomOp.builder("upsampling_bp") - .addIntegerArguments(size) - .addInputs(forwardOutput, epsilon) + .addIntegerArguments(intArgs) + .addInputs(input, epsilon) .addOutputs(reshapedEpsilon) .callInplace(false) .build(); @@ -105,7 +104,7 @@ public Pair backpropGradient(INDArray epsilon, LayerWorkspac return new Pair<>(gradient, reshapedEpsilon); } - protected int getSize(){ + protected int[] getSize(){ return layerConf().getSize(); } @@ -129,19 +128,20 @@ protected INDArray preOutput(boolean training, boolean forBackprop, LayerWorkspa int inH = input.size(2); int inW = input.size(3); - int size = getSize(); - int outH = inH * size; - int outW = inW * size; + int[] size = getSize(); + int outH = inH * size[0]; + int outW = inW * size[1]; INDArray reshapedOutput = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, new int[]{miniBatch, inDepth, outH, outW}, 'c'); - Upsampling upsampling = Upsampling.sameDiffBuilder() - .inPlace(false) - .inputArrays(new INDArray[]{input}) - .outputs(new INDArray[]{reshapedOutput}) - .scaleFactor(size) - .build(); + int[] intArgs = new int[] {size[0], size[1], 1}; // 1 is for NCHW + CustomOp upsampling = DynamicCustomOp.builder("upsampling2d") + .addIntegerArguments(intArgs) + .addInputs(input) + .addOutputs(reshapedOutput) + .callInplace(false) + .build(); Nd4j.getExecutioner().exec(upsampling); return reshapedOutput; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java new file mode 100644 index 000000000000..030ab3857eeb --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java @@ -0,0 +1,247 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.layers.convolution.upsampling; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.CustomOp; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.api.ops.impl.layers.convolution.Upsampling; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; + +import java.util.Arrays; + + +/** + * 3D Upsampling layer. + *

+ * Used for upsampling a 3D convolution + * + * @author Max Pumperla + */ +@Slf4j +public class Upsampling3D extends AbstractLayer { + + + public Upsampling3D(NeuralNetConfiguration conf) { + super(conf); + } + + public Upsampling3D(NeuralNetConfiguration conf, INDArray input) { + super(conf, input); + } + + + @Override + public double calcL2(boolean backpropParamsOnly) { + return 0; + } + + @Override + public double calcL1(boolean backpropParamsOnly) { + return 0; + } + + @Override + public Type type() { + return Type.UPSAMPLING; + } + + + @Override + public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + + // Assumes NCDHW order + int miniBatch = input.size(0); + int inChannels = input.size(1); + int inD = input.size(2); + int inH = input.size(3); + int inW = input.size(4); + + int[] intArgs = new int[] {1}; // 1 is channels first + + INDArray reshapedEpsilon = workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, new int[]{miniBatch, inChannels, inD, inH, inW}, 'c'); + + + Gradient gradient = new DefaultGradient(); + + CustomOp op = DynamicCustomOp.builder("upsampling3d_bp") + .addIntegerArguments(intArgs) + .addInputs(input, epsilon) + .addOutputs(reshapedEpsilon) + .callInplace(false) + .build(); + Nd4j.getExecutioner().exec(op); + + return new Pair<>(gradient, reshapedEpsilon); + } + + protected int[] getSize() { + return layerConf().getSize(); + } + + protected INDArray preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + applyDropOutIfNecessary(training, workspaceMgr); + + if (input.rank() != 5) { + throw new DL4JInvalidInputException("Got rank " + input.rank() + + " array as input to Upsampling3DLayer with shape " + Arrays.toString(input.shape()) + + ". Expected rank 5 array with shape " + + "[minibatchSize, channels, inputDepth, inputHeight, inputWidth]. " + + layerId()); + } + + if (preOutput != null && forBackprop) { + return preOutput; + } + + int miniBatch = input.size(0); + int inChannels = input.size(1); + int inD = input.size(2); + int inH = input.size(3); + int inW = input.size(4); + + int[] size = getSize(); + int outD = inD * size[0]; + int outH = inH * size[1]; + int outW = inW * size[2]; + + int[] intArgs = new int[] {size[0], size[1], size[2], 1}; // 1 is channels first + + INDArray reshapedOutput = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, + new int[]{miniBatch, inChannels, outD, outH, outW}, 'c'); + + + CustomOp upsampling = DynamicCustomOp.builder("upsampling3d") + .addIntegerArguments(intArgs) + .addInputs(input) + .addOutputs(reshapedOutput) + .callInplace(false) + .build(); + Nd4j.getExecutioner().exec(upsampling); + + return reshapedOutput; + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + applyDropOutIfNecessary(training, workspaceMgr); + + if (cacheMode == null) + cacheMode = CacheMode.NONE; + + INDArray z = preOutput(training, false, workspaceMgr); + + // we do cache only if cache workspace exists. Skip otherwise + if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + preOutput = z.unsafeDuplication(); + } + } + return z; + } + + @Override + public Layer transpose() { + throw new UnsupportedOperationException(layerId()); + } + + @Override + public Layer clone() { + return new Upsampling3D(conf.clone()); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + //No op + } + + @Override + public Gradient gradient() { + throw new UnsupportedOperationException("Not supported - no parameters"); + } + + @Override + public void fit() { + + } + + @Override + public int numParams() { + return 0; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public double score() { + return 0; + } + + @Override + public void accumulateScore(double accum) { + throw new UnsupportedOperationException(layerId()); + } + + + @Override + public void update(INDArray gradient, String paramType) { + + } + + @Override + public INDArray params() { + return null; + } + + @Override + public INDArray getParam(String param) { + return params(); + } + + @Override + public void setParams(INDArray params) { + + } + +}